From 5f97a48fc176210ba35ee6570e29291d1b3c757f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Thu, 17 Aug 2023 16:57:50 +0300 Subject: [PATCH] gguf : single pass for writing tensors + refactoring writer --- convert-llama-h5-to-gguf.py | 59 ------------------------------------- 1 file changed, 59 deletions(-) diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index a718ca4d1..d63893351 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -279,65 +279,6 @@ gguf_writer.write_kv_data_to_file() print("gguf: write tensors") gguf_writer.write_tensors_to_file() -# tensor data -print("gguf: convert and write tensor data") - -if num_parts == 0: - part_names = ("pytorch_model.bin",) -else: - part_names = ( - f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1) - ) - -for part_name in part_names: - print("gguf: loading model part '" + part_name + "'") - model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu") - - for name in model_part.keys(): - data = model_part[name] - - old_dtype = data.dtype - - # we don't need these - if name.endswith(".rotary_emb.inv_freq"): - continue - - # convert any unsupported data types to float32 - if data.dtype != torch.float16 and data.dtype != torch.float32: - data = data.to(torch.float32) - - data = data.squeeze().numpy() - - # reverse permute these - if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): - data = reverse_hf_permute(data, head_count, head_count_kv) - - # map tensor names - if name.endswith(".weight") and name[:-7] in tensor_map: - name = tensor_map[name[:-7]] + ".weight" - elif name.endswith(".bias") and name[:-5] in tensor_map: - name = tensor_map[name[:-5]] + ".bias" - else: - print("Can not map tensor '" + name + "'") - sys.exit() - - n_dims = len(data.shape) - data_dtype = data.dtype - - # if f32 desired, convert any float16 to float32 - if ftype == 0 and data.dtype == np.float16: - data = data.astype(np.float32) - - # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 - if ftype == 1 and data_dtype == np.float16 and n_dims == 1: - data = data.astype(np.float32) - - # if f16 desired, convert any float32 2-dim weight tensors to float16 - if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: - data = data.astype(np.float16) - - gguf_writer.write_tensor_to_file(data) - gguf_writer.close()