From 1fc3d30b71a707187eb1f995c4776db7aaa6265a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Sat, 12 Aug 2023 16:09:47 +0300 Subject: [PATCH] gguf : start implementing quantization (WIP) --- examples/gguf/gguf.cpp | 2 +- gguf-llama.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 6f454a204..08f2b6322 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -421,7 +421,7 @@ int main(int argc, char ** argv) { GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); } else if (mode == "r") { GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); - GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); + //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); } else if (mode == "q") { llama_model_quantize_params params = llama_model_quantize_default_params(); diff --git a/gguf-llama.cpp b/gguf-llama.cpp index eecefc0f6..ea721a0c7 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -527,7 +527,7 @@ struct ggml_context * ctx_data = NULL; // TODO: read all hparams from file int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version"); if (q_ver_idx != -1) { - hparams.ftype = gguf_get_val_u32(gguf_ctx, q_ver_idx); + hparams.ftype = (enum llama_ftype) gguf_get_val_u32(gguf_ctx, q_ver_idx); } hparams.n_vocab = read_n_vocab();