"""TODOs 1. Implement writers for known architectures, LLaMA in particular. 2. Add docstrings from the format specs. 3. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct import constants from enum import IntEnum from typing import Any, IO, List import numpy as np class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 Q4_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(val): if isinstance(val, str): return GGUFValueType.STRING elif isinstance(val, list): return GGUFValueType.ARRAY elif isinstance(val, float): return GGUFValueType.FLOAT32 elif isinstance(val, bool): return GGUFValueType.BOOL else: return GGUFValueType.INT32 class GGUFWriter: def __init__(self, fout: IO): self.fout = fout self.offset_tensor = 0 self.tensors: List[np.ndarray] = [] def write_header(self, tensor_count: int, metadata_kv_count: int): self.fout.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def write_key(self, key: str): encoded_key = key.encode("utf8") self.fout.write(struct.pack(" int: return ((x + n - 1) // n) * n def write_tensor_info(self, name: str, tensor: np.ndarray): self.write_key(name) n_dims = len(tensor.shape) self.fout.write(struct.pack("