mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 12:24:35 +00:00
gguf : start implementing quantization (WIP)
This commit is contained in:
parent
fa7c39540c
commit
1fc3d30b71
@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
|
|||||||
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
||||||
} else if (mode == "r") {
|
} else if (mode == "r") {
|
||||||
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
||||||
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
//GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
||||||
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
||||||
} else if (mode == "q") {
|
} else if (mode == "q") {
|
||||||
llama_model_quantize_params params = llama_model_quantize_default_params();
|
llama_model_quantize_params params = llama_model_quantize_default_params();
|
||||||
|
@ -527,7 +527,7 @@ struct ggml_context * ctx_data = NULL;
|
|||||||
// TODO: read all hparams from file
|
// TODO: read all hparams from file
|
||||||
int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version");
|
int q_ver_idx = gguf_find_key (gguf_ctx, "general.quantization_version");
|
||||||
if (q_ver_idx != -1) {
|
if (q_ver_idx != -1) {
|
||||||
hparams.ftype = gguf_get_val_u32(gguf_ctx, q_ver_idx);
|
hparams.ftype = (enum llama_ftype) gguf_get_val_u32(gguf_ctx, q_ver_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
hparams.n_vocab = read_n_vocab();
|
hparams.n_vocab = read_n_vocab();
|
||||||
|
Loading…
Reference in New Issue
Block a user