mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-31 22:04:35 +00:00
llama : print number of tensors per type + print arch + style
This commit is contained in:
parent
b275de745d
commit
aa3efe87c8
28
llama.cpp
28
llama.cpp
@ -126,13 +126,6 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
|
|||||||
// helpers
|
// helpers
|
||||||
//
|
//
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
static std::string to_string(const T & val) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << val;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void zeros(std::ofstream & file, size_t n) {
|
static void zeros(std::ofstream & file, size_t n) {
|
||||||
char zero = 0;
|
char zero = 0;
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
@ -1070,10 +1063,14 @@ struct llama_model_loader {
|
|||||||
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
||||||
__func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(file_version));
|
__func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(file_version));
|
||||||
|
|
||||||
|
std::map<enum ggml_type, uint32_t> n_type;
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; i++) {
|
for (int i = 0; i < n_tensors; i++) {
|
||||||
const char * name = gguf_get_tensor_name(ctx_gguf, i);
|
const char * name = gguf_get_tensor_name(ctx_gguf, i);
|
||||||
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name);
|
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name);
|
||||||
|
|
||||||
|
n_type[meta->type]++;
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
|
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1083,6 +1080,15 @@ struct llama_model_loader {
|
|||||||
|
|
||||||
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
|
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// print type counts
|
||||||
|
for (auto & kv : n_type) {
|
||||||
|
if (kv.second == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llama_mmap::SUPPORTED) {
|
if (!llama_mmap::SUPPORTED) {
|
||||||
@ -1337,6 +1343,7 @@ static void llama_model_load_internal(
|
|||||||
auto & hparams = model.hparams;
|
auto & hparams = model.hparams;
|
||||||
|
|
||||||
std::string general_name = "n/a";
|
std::string general_name = "n/a";
|
||||||
|
std::string general_arch = "n/a";
|
||||||
|
|
||||||
// read hparams
|
// read hparams
|
||||||
{
|
{
|
||||||
@ -1372,6 +1379,7 @@ static void llama_model_load_internal(
|
|||||||
|
|
||||||
// get general kv
|
// get general kv
|
||||||
GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name");
|
GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name");
|
||||||
|
GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture");
|
||||||
|
|
||||||
// special tokens
|
// special tokens
|
||||||
GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id");
|
GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id");
|
||||||
@ -1440,16 +1448,17 @@ static void llama_model_load_internal(
|
|||||||
tok_score.tok = std::move(word);
|
tok_score.tok = std::move(word);
|
||||||
tok_score.score = scores[i];
|
tok_score.score = scores[i];
|
||||||
|
|
||||||
|
// determine the newline token: 0x0A == 10 == '\n'
|
||||||
if (tok_score.tok == "<0x0A>") {
|
if (tok_score.tok == "<0x0A>") {
|
||||||
vocab.linefeed_id = i;
|
vocab.linefeed_id = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// hparams
|
// hparams
|
||||||
LLAMA_LOG_INFO("%s: format = %s\n", __func__, llama_file_version_name(ml->file_version));
|
LLAMA_LOG_INFO("%s: format = %s\n", __func__, llama_file_version_name(ml->file_version));
|
||||||
|
LLAMA_LOG_INFO("%s: arch = %s\n", __func__, general_arch.c_str());
|
||||||
LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab);
|
LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab);
|
||||||
LLAMA_LOG_INFO("%s: n_ctx_train = %u\n", __func__, hparams.n_ctx_train);
|
LLAMA_LOG_INFO("%s: n_ctx_train = %u\n", __func__, hparams.n_ctx_train);
|
||||||
LLAMA_LOG_INFO("%s: n_ctx = %u\n", __func__, hparams.n_ctx);
|
LLAMA_LOG_INFO("%s: n_ctx = %u\n", __func__, hparams.n_ctx);
|
||||||
@ -1466,8 +1475,6 @@ static void llama_model_load_internal(
|
|||||||
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
|
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
|
||||||
LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_elements*1e-9);
|
LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_elements*1e-9);
|
||||||
|
|
||||||
// TODO: print number of tensors for each quantization
|
|
||||||
|
|
||||||
// general kv
|
// general kv
|
||||||
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, general_name.c_str());
|
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, general_name.c_str());
|
||||||
|
|
||||||
@ -1481,6 +1488,7 @@ static void llama_model_load_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (vocab_only) {
|
if (vocab_only) {
|
||||||
|
LLAMA_LOG_INFO("%s: vocab only - skipping tensors\n", __func__);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user