llama : fix compatibility with old 2 expert models (#6735)

This commit is contained in:
slaren 2024-04-18 09:04:47 +02:00 committed by GitHub
parent 3b8f1ec4b1
commit c71bfd736e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
// for moe merged tensors // for moe merged tensors
ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer; ctx_size += ggml_tensor_overhead()*n_layer*3;
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map; std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
for (auto & it : buft_layer_count) { for (auto & it : buft_layer_count) {