llama : model-based max number of graph nodes calculation (#8970)

* llama : model-based max number of graph nodes calculation

* Update src/llama.cpp

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Nico Bosshard 2024-08-12 17:13:59 +02:00 committed by GitHub
parent 84eb2f4fad
commit 0fd93cdef5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3575,13 +3575,8 @@ namespace GGUFMeta {
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>; using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
// TODO: update when needed or think of some clever automatic way to do this static size_t llama_model_max_nodes(const llama_model & model) {
static size_t llama_model_max_nodes(const llama_model & /*model*/) { return std::max<size_t>(8192, model.tensors_by_name.size()*5);
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
// return 32768;
//}
return 8192;
} }
struct llama_model_loader { struct llama_model_loader {