mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
llama : model-based max number of graph nodes calculation (#8970)
* llama : model-based max number of graph nodes calculation * Update src/llama.cpp --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
84eb2f4fad
commit
0fd93cdef5
@ -3575,13 +3575,8 @@ namespace GGUFMeta {
|
|||||||
|
|
||||||
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
|
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
|
||||||
|
|
||||||
// TODO: update when needed or think of some clever automatic way to do this
|
static size_t llama_model_max_nodes(const llama_model & model) {
|
||||||
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
|
return std::max<size_t>(8192, model.tensors_by_name.size()*5);
|
||||||
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
|
|
||||||
// return 32768;
|
|
||||||
//}
|
|
||||||
|
|
||||||
return 8192;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_model_loader {
|
struct llama_model_loader {
|
||||||
|
Loading…
Reference in New Issue
Block a user