"""TODOs 1. Implement writers for known architectures, LLaMA in particular. 2. Add docstrings from the format specs. 3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct import constants from enum import IntEnum from typing import Any, IO, List import numpy as np import sys class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 Q4_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(val): if isinstance(val, str) or isinstance(val, bytes) or isinstance(val, bytearray): return GGUFValueType.STRING elif isinstance(val, list): return GGUFValueType.ARRAY elif isinstance(val, float): return GGUFValueType.FLOAT32 elif isinstance(val, bool): return GGUFValueType.BOOL elif isinstance(val, int): return GGUFValueType.INT32 else: print("Unknown type: "+str(type(val))) sys.exit() class GGUFWriter: def __init__(self, fout: IO): self.fout = fout self.offset_tensor = 0 self.data_alignment = constants.GGUF_DEFAULT_ALIGNMENT self.kv_data = b"" self.kv_data_count = 0 self.ti_data = b"" self.ti_data_count = 0 def write_header_to_file(self): self.fout.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def add_key(self, key: str): self.add_val(key, GGUFValueType.STRING, add_vtype=False) def add_uint8(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.UINT8) def add_int8(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.INT8) def add_uint16(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.UINT16) def add_int16(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.INT16) def add_uint32(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.UINT32) def add_int32(self, key: str, val: int): self.add_key(key) self.add_val(val, GGUFValueType.INT32) def add_float32(self, key: str, val: float): self.add_key(key) self.add_val(val, GGUFValueType.FLOAT32) def add_bool(self, key: str, val: bool): self.add_key(key) self.add_val(val, GGUFValueType.BOOL) def add_string(self, key: str, val: str): self.add_key(key) self.add_val(val, GGUFValueType.STRING) def add_array(self, key: str, val: list): if not isinstance(val, list): raise ValueError("Value must be a list for array type") self.add_key(key) self.add_val(val, GGUFValueType.ARRAY) def add_val(self: str, val: Any, vtype: GGUFValueType = None, add_vtype: bool = True): if vtype is None: vtype = GGUFValueType.get_type(val) if add_vtype: self.kv_data += struct.pack(" int: return ((x + n - 1) // n) * n def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.dtype, tensor_nbytes: int): encoded_name = name.encode("utf8") self.ti_data += struct.pack("