gguf : start implementing quantization (WIP)

This commit is contained in:
M. Yusuf Sarıgöz 2023-08-12 16:09:47 +03:00
parent fa7c39540c
commit 1fc3d30b71
2 changed files with 2 additions and 2 deletions

View File

@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file"); GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
} else if (mode == "r") { } else if (mode == "r") {
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file"); //GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file"); GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
} else if (mode == "q") { } else if (mode == "q") {
llama_model_quantize_params params = llama_model_quantize_default_params(); llama_model_quantize_params params = llama_model_quantize_default_params();

View File

@ -527,7 +527,7 @@ struct ggml_context * ctx_data = NULL;
// TODO: read all hparams from file // TODO: read all hparams from file
int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version"); int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version");
if (q_ver_idx != -1) { if (q_ver_idx != -1) {
hparams.ftype = gguf_get_val_u32(gguf_ctx, q_ver_idx); hparams.ftype = (enum llama_ftype) gguf_get_val_u32(gguf_ctx, q_ver_idx);
} }
hparams.n_vocab = read_n_vocab(); hparams.n_vocab = read_n_vocab();