mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
llama : fix compatibility with old 2 expert models (#6735)
This commit is contained in:
parent
3b8f1ec4b1
commit
c71bfd736e
@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
|
|||||||
size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
|
size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
|
||||||
|
|
||||||
// for moe merged tensors
|
// for moe merged tensors
|
||||||
ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
|
ctx_size += ggml_tensor_overhead()*n_layer*3;
|
||||||
|
|
||||||
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
||||||
for (auto & it : buft_layer_count) {
|
for (auto & it : buft_layer_count) {
|
||||||
|
Loading…
Reference in New Issue
Block a user