From cfb8e35b73bd8daca6fa5179d30c48a5db43bc31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Thu, 10 Aug 2023 19:56:56 +0300 Subject: [PATCH] gguf : inference with 7B model working (WIP) --- gguf-llama.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gguf-llama.cpp b/gguf-llama.cpp index b88a2d8bf..0c4095714 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -493,6 +493,8 @@ struct ggml_context * ctx_data = NULL; gguf_ctx = gguf_init_from_file(fname, params); + read_hparams(); + read_vocab(); read_tensor_metadata(tensors_map); } @@ -523,7 +525,7 @@ struct ggml_context * ctx_data = NULL; } } - throw std::runtime_error(format("failed to find n_mult for n_ff = %d and n_emb = %d\n", n_ff, n_embd)); + throw std::runtime_error(format("failed to find n_mult for n_ff = %d and n_embd = %d\n", n_ff, n_embd)); } void read_hparams() { @@ -534,14 +536,14 @@ struct ggml_context * ctx_data = NULL; hparams.n_ctx = read_u32("llama.context_length"); hparams.n_embd = read_u32("llama.embedding_length"); uint32_t n_ff = read_u32("llama.feed_forward_length"); - hparams.n_mult = find_n_mult(n_ff, hparams.n_embd); + //hparams.n_mult = find_n_mult(n_ff, hparams.n_embd); hparams.n_head = read_u32("llama.attention.head_count"); hparams.n_layer = read_u32("llama.layer_count"); hparams.n_rot = hparams.n_embd / hparams.n_head; //hparams.ftype = (enum llama_ftype) file.read_u32(); // LLaMAv2 - hparams.n_head_kv = read_u32("llama.attention.head_count_kv"); + // hparams.n_head_kv = read_u32("llama.attention.head_count_kv"); } void read_vocab() {