From 4420cff6547d44791e03a04a0606991d006e6ebc Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Fri, 15 Sep 2023 13:52:43 +0800 Subject: [PATCH] fix vram calculation for starcoder --- llama.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index db74b6db9..3aa247d27 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2277,8 +2277,11 @@ static void llm_load_tensors( if (backend == GGML_BACKEND_GPU) { vram_weights += ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) + - ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.wo) + - ggml_nbytes(layer.w2) + ggml_nbytes(layer.w3); + ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.bqkv) + + ggml_nbytes(layer.wo) + ggml_nbytes(layer.bo) + + ggml_nbytes(layer.ffn_norm) + ggml_nbytes(layer.ffn_norm_b) + + ggml_nbytes(layer.w2) + ggml_nbytes(layer.b2) + + ggml_nbytes(layer.w3) + ggml_nbytes(layer.b3); } } } break;