llama.cpp : print actual model size

2024-12-26 11:24:35 +00:00 · 2023-08-17 15:18:16 +02:00 · 2023-08-17 15:18:16 +02:00 · 5a0a2c5685
commit 5a0a2c5685
parent 42f8fe1927
1 changed files with 13 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1023,6 +1023,7 @@ struct llama_model_loader {
    int n_kv      = 0;
    int n_tensors = 0;
    int n_created = 0;
+    size_t n_tot_elements = 0;

    bool use_mmap = false;

@ -1047,6 +1048,16 @@ struct llama_model_loader {

        file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);

+        for (int i = 0; i < n_tensors; i++) {
+            const char * name = gguf_get_tensor_name(ctx_gguf, i);
+            struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name);
+            size_t elem = 1;
+            for (int j = 0; j < t->n_dims; j++) {
+                elem *= t->ne[j];
+            }
+            n_tot_elements += elem;
+        }
+        
        // print meta data
        // TODO: make optional
        {
@ -1413,7 +1424,8 @@ static void llama_model_load_internal(
        LLAMA_LOG_INFO("%s: freq_base  = %.1f\n", __func__, hparams.rope_freq_base);
        LLAMA_LOG_INFO("%s: freq_scale = %g\n",   __func__, hparams.rope_freq_scale);
        LLAMA_LOG_INFO("%s: ftype      = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype));
-        LLAMA_LOG_INFO("%s: model size = %s\n",   __func__, llama_model_type_name(model.type));
+        LLAMA_LOG_INFO("%s: model size = %.2f B\n",   __func__, ml->n_tot_elements*1e-9);
+
    }

    if (vocab_only) {