From c4f02b4f74c5fdb6493090c238891163709631e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Sat, 12 Aug 2023 12:01:17 +0300 Subject: [PATCH] gguf : start implementing quantization (WIP) --- gguf-llama.cpp | 3 +-- gguf-util.h | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gguf-llama.cpp b/gguf-llama.cpp index cf2c56955..defe26fe0 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -778,8 +778,7 @@ struct gguf_file_saver { } write_tensor_info(tensor); - // file.write_raw(new_data); - GGML_UNUSED(new_data); + file.write_raw(new_data, new_size); size_t padded_size = GGML_PAD(new_size, GGUF_DEFAULT_ALIGNMENT); // TODO: handle custom alignment size_t pad = padded_size - new_size; file.write_zeros(pad); diff --git a/gguf-util.h b/gguf-util.h index 0964e6d02..17f9dc968 100644 --- a/gguf-util.h +++ b/gguf-util.h @@ -123,6 +123,10 @@ struct gguf_file { return fwrite((const char *) &val, sizeof(val), 1, fp); } + void write_raw(const void * data, size_t size) { + fwrite(data, size, 1, fp); + } + template void write_val(const std::string & key, enum gguf_type type, const T & val) { write_str(key);