mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
fix vram calculation for starcoder
This commit is contained in:
parent
dac31da489
commit
4420cff654
@ -2277,8 +2277,11 @@ static void llm_load_tensors(
|
|||||||
if (backend == GGML_BACKEND_GPU) {
|
if (backend == GGML_BACKEND_GPU) {
|
||||||
vram_weights +=
|
vram_weights +=
|
||||||
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) +
|
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) +
|
||||||
ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.wo) +
|
ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.bqkv) +
|
||||||
ggml_nbytes(layer.w2) + ggml_nbytes(layer.w3);
|
ggml_nbytes(layer.wo) + ggml_nbytes(layer.bo) +
|
||||||
|
ggml_nbytes(layer.ffn_norm) + ggml_nbytes(layer.ffn_norm_b) +
|
||||||
|
ggml_nbytes(layer.w2) + ggml_nbytes(layer.b2) +
|
||||||
|
ggml_nbytes(layer.w3) + ggml_nbytes(layer.b3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
Loading…
Reference in New Issue
Block a user