diff --git a/llama.cpp b/llama.cpp index 38a2d5ba8..5a1501651 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1023,6 +1023,7 @@ struct llama_model_loader { int n_kv = 0; int n_tensors = 0; int n_created = 0; + size_t n_tot_elements = 0; bool use_mmap = false; @@ -1047,6 +1048,16 @@ struct llama_model_loader { file_version = (enum llama_file_version) gguf_get_version(ctx_gguf); + for (int i = 0; i < n_tensors; i++) { + const char * name = gguf_get_tensor_name(ctx_gguf, i); + struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name); + size_t elem = 1; + for (int j = 0; j < t->n_dims; j++) { + elem *= t->ne[j]; + } + n_tot_elements += elem; + } + // print meta data // TODO: make optional { @@ -1413,7 +1424,8 @@ static void llama_model_load_internal( LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base); LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale); LLAMA_LOG_INFO("%s: ftype = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype)); - LLAMA_LOG_INFO("%s: model size = %s\n", __func__, llama_model_type_name(model.type)); + LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_tot_elements*1e-9); + } if (vocab_only) {