diff --git a/.gitignore b/.gitignore index abe8e28cb..fd41f26cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.o *.a *.so +*.gguf .DS_Store .build/ .cache/ diff --git a/constants.py b/constants.py index 3a97460e5..34880bb20 100644 --- a/constants.py +++ b/constants.py @@ -1,5 +1,6 @@ -GGUF_MAGIC = 0x47475546 -GGUF_VERSION = 1 +GGUF_MAGIC = 0x47475546 +GGUF_VERSION = 1 +GGUF_DEFAULT_ALIGNMENT = 32 # general KEY_GENERAL_ARCHITECTURE = "general.architecture" diff --git a/gguf.py b/gguf.py index 764ae9a9d..c5b2174c9 100644 --- a/gguf.py +++ b/gguf.py @@ -1,14 +1,16 @@ """TODOs -1. Implement writing tensor data with alignment. -2. Implement writers for known architectures, LLaMA in particular. -3. Add docstrings from the format specs. -4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. +1. Implement writers for known architectures, LLaMA in particular. +2. Add docstrings from the format specs. +3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct import constants from enum import IntEnum -from typing import List, Any +from typing import Any, IO, List + +import numpy as np + class GGMLQuantizationType(IntEnum): F32 = 0 @@ -54,15 +56,18 @@ class GGUFValueType(IntEnum): else: return GGUFValueType.INT32 + class GGUFWriter: - def __init__(self, buffered_writer): - self.buffered_writer = buffered_writer + def __init__(self, fout: IO): + self.fout = fout + self.offset_tensor = 0 + self.tensors: List[np.ndarray] = [] def write_header(self, tensor_count: int, metadata_kv_count: int): - self.buffered_writer.write(struct.pack(" "GGUFWriter": @@ -119,40 +124,69 @@ class GGUFWriter: if vtype is None: vtype = GGUFValueType.get_type(val) - self.buffered_writer.write(struct.pack(" int: + return ((x + n - 1) // n) * n + + def write_tensor_info(self, name: str, tensor: np.ndarray): + self.write_val(name, GGUFValueType.STRING) + n_dims = len(tensor.shape) + self.write_val(n_dims, GGUFValueType.INT32) + for i in range(n_dims): + self.write_val(tensor.shape[n_dims - 1 - i], GGUFValueType.INT32) + + assert tensor.dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" + dtype = GGMLQuantizationType.F32 if tensor.dtype == np.float32 else GGMLQuantizationType.F16 + self.write_val(dtype, GGUFValueType.INT32) + self.fout.write(struct.pack("