gguf : track writer state, free unneeded tensors, cleanup (#3871)

This commit is contained in:
Jared Van Bortel 2023-11-07 12:43:04 -05:00 committed by GitHub
parent 413503d4b9
commit 0a7c980b6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 30 deletions

View File

@ -646,18 +646,17 @@ class GGUFValueType(IntEnum):
sys.exit() sys.exit()
class WriterState(Enum):
EMPTY = auto()
HEADER = auto()
KV_DATA = auto()
TI_DATA = auto()
class GGUFWriter: class GGUFWriter:
fout: BufferedWriter fout: BufferedWriter
arch: str temp_file: tempfile.SpooledTemporaryFile[bytes] | None
offset_tensor = 0 tensors: list[np.ndarray[Any, Any]]
data_alignment = GGUF_DEFAULT_ALIGNMENT
kv_data = b""
kv_data_count = 0
ti_data = b""
ti_data_count = 0
use_temp_file: bool
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
tensors: list[tuple[np.ndarray[Any, Any], int]]
@property @property
def pack_prefix(self): def pack_prefix(self):
@ -683,27 +682,47 @@ class GGUFWriter:
GGUFValueType.FLOAT64: f"{self.pack_prefix}d", GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
GGUFValueType.BOOL: "?" , GGUFValueType.BOOL: "?" ,
} }
self.add_architecture() self.offset_tensor = 0
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
self.kv_data = b""
self.kv_data_count = 0
self.ti_data = b""
self.ti_data_count = 0
self.use_temp_file = use_temp_file self.use_temp_file = use_temp_file
self.temp_file = None
self.tensors = [] self.tensors = []
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian" endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
print(f"This gguf file is for {endianess_str} only") print(f"This gguf file is for {endianess_str} only")
self.state = WriterState.EMPTY
self.add_architecture()
def write_header_to_file(self): def write_header_to_file(self):
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
self.fout.write(struct.pack("<I", GGUF_MAGIC)) self.fout.write(struct.pack("<I", GGUF_MAGIC))
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION)) self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count)) self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count)) self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
self.flush() self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) self.state = WriterState.HEADER
def write_kv_data_to_file(self): def write_kv_data_to_file(self):
if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}')
self.fout.write(self.kv_data) self.fout.write(self.kv_data)
self.flush() self.flush()
self.state = WriterState.KV_DATA
def write_ti_data_to_file(self): def write_ti_data_to_file(self):
if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
self.fout.write(self.ti_data) self.fout.write(self.ti_data)
self.flush() self.flush()
self.state = WriterState.TI_DATA
def add_key(self, key: str): def add_key(self, key: str):
self.add_val(key, GGUFValueType.STRING, add_vtype=False) self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@ -796,6 +815,9 @@ class GGUFWriter:
return ((x + n - 1) // n) * n return ((x + n - 1) // n) * n
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None): def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
encoded_name = name.encode("utf8") encoded_name = name.encode("utf8")
@ -825,23 +847,22 @@ class GGUFWriter:
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype) self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes if self.temp_file is None:
self.tensors.append(tensor)
if self.temp_file is None:
self.tensors.append((tensor, pad))
return return
tensor.tofile(self.temp_file) tensor.tofile(self.temp_file)
self.write_padding(self.temp_file, tensor.nbytes)
if pad != 0: def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
self.temp_file.write(bytes([0] * pad))
def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
if pad != 0: if pad != 0:
fp.write(bytes([0] * pad)) fp.write(bytes([0] * pad))
def write_tensor_data(self, tensor: np.ndarray[Any, Any]): def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
if self.state is not WriterState.TI_DATA:
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
if self.endianess==GGUFEndian.BIG: if self.endianess==GGUFEndian.BIG:
tensor.byteswap(inplace=True) tensor.byteswap(inplace=True)
self.write_padding(self.fout, self.fout.tell()) self.write_padding(self.fout, self.fout.tell())
@ -854,10 +875,13 @@ class GGUFWriter:
self.write_padding(self.fout, self.fout.tell()) self.write_padding(self.fout, self.fout.tell())
if self.temp_file is None: if self.temp_file is None:
for (currtensor, currpad) in self.tensors: while True:
currtensor.tofile(self.fout) try:
if currpad != 0: tensor = self.tensors.pop(0)
self.fout.write(bytes([0] * currpad)) except IndexError:
break
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)
return return
self.temp_file.seek(0) self.temp_file.seek(0)
@ -1002,11 +1026,8 @@ class GGUFWriter:
class SpecialVocab: class SpecialVocab:
load_merges: bool = False merges: list[str]
merges: list[str] = [] special_token_ids: dict[str, int]
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
special_token_ids: dict[str, int] = {}
n_vocab: int | None = None
def __init__( def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False, self, path: str | os.PathLike[str], load_merges: bool = False,
@ -1016,8 +1037,11 @@ class SpecialVocab:
self.special_token_ids = {} self.special_token_ids = {}
self.n_vocab = n_vocab self.n_vocab = n_vocab
self.load_merges = load_merges self.load_merges = load_merges
self.merges = []
if special_token_types is not None: if special_token_types is not None:
self.special_token_types = special_token_types self.special_token_types = special_token_types
else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
self._load(Path(path)) self._load(Path(path))
def _load(self, path: Path) -> None: def _load(self, path: Path) -> None:

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "gguf" name = "gguf"
version = "0.4.5" version = "0.4.6"
description = "Write ML models in GGUF for GGML" description = "Write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"] authors = ["GGML <ggml@ggml.ai>"]
packages = [ packages = [