gguf : start implementing quantization (WIP)

2024-12-30 21:34:36 +00:00 · 2023-08-12 12:01:17 +03:00 · 2023-08-12 12:01:17 +03:00 · c4f02b4f74
commit c4f02b4f74
parent 0e1a3c7e7d
2 changed files with 5 additions and 2 deletions
--- a/gguf-llama.cpp
+++ b/gguf-llama.cpp
@ -778,8 +778,7 @@ struct gguf_file_saver {
        }
        write_tensor_info(tensor);
-        // file.write_raw(new_data);
+        file.write_raw(new_data, new_size);
        GGML_UNUSED(new_data);
        size_t padded_size = GGML_PAD(new_size, GGUF_DEFAULT_ALIGNMENT); // TODO: handle custom alignment
        size_t pad = padded_size - new_size;
        file.write_zeros(pad);
--- a/gguf-util.h
+++ b/gguf-util.h
@ -123,6 +123,10 @@ struct gguf_file {
        return fwrite((const char *) &val, sizeof(val), 1, fp);
    }
    void write_raw(const void * data, size_t size) {
        fwrite(data, size, 1, fp);
    }
    template<typename T>
    void write_val(const std::string & key, enum gguf_type type, const T & val) {
        write_str(key);