gguf : single pass for writing tensors + refactoring writer

2024-12-27 03:44:35 +00:00 · 2023-08-17 16:57:50 +03:00 · 2023-08-17 16:57:50 +03:00 · 5f97a48fc1
commit 5f97a48fc1
parent dce07c3121
1 changed files with 0 additions and 59 deletions
--- a/convert-llama-h5-to-gguf.py
+++ b/convert-llama-h5-to-gguf.py
@ -279,65 +279,6 @@ gguf_writer.write_kv_data_to_file()
 print("gguf: write tensors")
 gguf_writer.write_tensors_to_file()
 # tensor data
 print("gguf: convert and write tensor data")
 if num_parts == 0:
    part_names = ("pytorch_model.bin",)
 else:
    part_names = (
        f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
    )
 for part_name in part_names:
    print("gguf: loading model part '" + part_name + "'")
    model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
    for name in model_part.keys():
        data = model_part[name]
        old_dtype = data.dtype
        # we don't need these
        if name.endswith(".rotary_emb.inv_freq"):
            continue
        # convert any unsupported data types to float32
        if data.dtype != torch.float16 and data.dtype != torch.float32:
            data = data.to(torch.float32)
        data = data.squeeze().numpy()
        # reverse permute these
        if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
            data = reverse_hf_permute(data, head_count, head_count_kv)
        # map tensor names
        if name.endswith(".weight") and name[:-7] in tensor_map:
            name = tensor_map[name[:-7]] + ".weight"
        elif name.endswith(".bias") and name[:-5] in tensor_map:
            name = tensor_map[name[:-5]] + ".bias"
        else:
            print("Can not map tensor '" + name + "'")
            sys.exit()
        n_dims = len(data.shape)
        data_dtype = data.dtype
        # if f32 desired, convert any float16 to float32
        if ftype == 0 and data.dtype == np.float16:
            data = data.astype(np.float32)
        # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
        if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
            data = data.astype(np.float32)
        # if f16 desired, convert any float32 2-dim weight tensors to float16
        if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
            data = data.astype(np.float16)
        gguf_writer.write_tensor_to_file(data)
 gguf_writer.close()