llama.cpp/convert-llama-h5-to-gguf.py

# Quick and dirty HF llama --> gguf conversion, GQA/70b wont work

import gguf
import gguf_tensor_map as tmap
import os
import sys
import struct
import json
import numpy as np
from typing import Any, List
from pathlib import Path
from transformers import AutoModelForCausalLM
from sentencepiece import SentencePieceProcessor


#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'

def permute(weights: NDArray, n_head: int) -> NDArray:
    return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
                   .swapaxes(1, 2)
                   .reshape(weights.shape))


if len(sys.argv) < 3:
    print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
    print("  ftype == 0 -> float32")
    print("  ftype == 1 -> float16")
    sys.exit(1)


# output in the same directory as the model
dir_model = sys.argv[1]
last_dir = os.path.basename(os.path.normpath(dir_model))


# possible tensor data types
#   ftype == 0 -> float32
#   ftype == 1 -> float16
#
# map from ftype to string
ftype_str = ["f32", "f16"]

ftype = 1
if len(sys.argv) > 2:
    ftype = int(sys.argv[2])
    if ftype < 0 or ftype > 1:
        print("Invalid ftype: " + str(ftype))
        sys.exit(1)

fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"

print("gguf: loading model "+last_dir)
    
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
    hparams = json.load(f)

if hparams["architectures"][0] != "LlamaForCausalLM":
    print("Model architecture not supported: " + hparams["architectures"][0] )
    sys.exit()

model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True)
list_vars = model.state_dict()

gguf_writer = gguf.GGUFWriter.open(fname_out)


print("gguf: get model metadata")

llm_arch    = "llama"
head_count  = hparams["num_attention_heads"]
block_count = hparams["num_hidden_layers"]

gguf_writer.add_name(last_dir)
gguf_writer.add_architecture(llm_arch)
gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
gguf_writer.add_layer_count(llm_arch, block_count)
gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])
gguf_writer.add_rope_dimension_count(llm_arch, hparams["hidden_size"] // hparams["num_attention_heads"])
gguf_writer.add_head_count(llm_arch, head_count)
gguf_writer.add_layer_norm_rms_eps(llm_arch, hparams["rms_norm_eps"])


# TOKENIZATION

print("gguf: get tokenizer metadata")

tokens: List[str] = []
scores: List[float] = []

if Path(dir_model + "/tokenizer.model").is_file():
    # vocab type sentencepiece
    print("gguf: get sentencepiece tokenizer vocab and scores")

    tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")

    for i in range(tokenizer.vocab_size()):
        text: bytes
        if tokenizer.is_unknown(i):
            text = " \u2047 ".encode("utf-8")
        elif tokenizer.is_control(i):
            text = b""
        if tokenizer.is_byte(i):
            piece = tokenizer.id_to_piece(i)
            if len(piece) != 6:
                raise Exception(f"Invalid token: {piece}")
            byte_value = int(piece[3:-1], 16)
            text = struct.pack("B", byte_value)
        else:
            text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
        score: float = tokenizer.get_score(i)

        tokens.append(text)
        scores.append(score)

    gguf_writer.add_tokenizer_model("llama")
    gguf_writer.add_token_list(tokens)
    gguf_writer.add_token_scores(scores)

if Path(dir_model + "/tokenizer.json").is_file():
    with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
        tokenizer = json.load(f)

    if "added_tokens" in tokenizer and Path(dir_model + "/tokenizer_config.json").is_file():
        print("gguf: get special token ids")

        with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
            tokenizer_config = json.load(f)

        # find special token ids

        if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] != None:
            for key in tokenizer["added_tokens"]:
                if key["content"] == tokenizer_config["bos_token"]["content"]:
                    gguf_writer.add_bos_token_id(key["id"])

        if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] != None:
            for key in tokenizer["added_tokens"]:
                if key["content"] == tokenizer_config["eos_token"]["content"]:
                    gguf_writer.add_eos_token_id(key["id"])

        if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] != None:
            for key in tokenizer["added_tokens"]:
                if key["content"] == tokenizer_config["unk_token"]["content"]:
                    gguf_writer.add_unk_token_id(key["id"])

        if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] != None:
            for key in tokenizer["added_tokens"]:
                if key["content"] == tokenizer_config["sep_token"]["content"]:
                    gguf_writer.add_sep_token_id(key["id"])

        if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] != None:
            for key in tokenizer["added_tokens"]:
                if key["content"] == tokenizer_config["pad_token"]["content"]:
                    gguf_writer.add_pad_token_id(key["id"])


# TENSORS

tensor_map = tmap.get_tensor_map(block_count)

# tensor info
print("gguf: get tensor metadata")

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()

    # we don't need these
    if name.endswith(".rotary_emb.inv_freq"):
        continue

    # permute these
    if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
        data = permute(data,head_count)

    # map tensor names
    if name.endswith(".weight") and name[:-7] in tensor_map:
        name = tensor_map[name[:-7]] + ".weight"
    elif name.endswith(".bias") and name[:-5] in tensor_map:
        name = tensor_map[name[:-5]] + ".bias"
    else:
        print( "Can not map tensor '" + name + "'" )
        sys.exit()

    n_dims = len(data.shape)
    data_dtype = data.dtype 

#    print( name + " dims " + str(n_dims) + " dtype " + str(data.dtype) )

    if data.dtype != np.float16 and data.dtype != np.float32:
        # convert any unsupported data types to float32
        data_dtype = np.float32
    elif ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
        # if f16 desired, convert any float32 2-dim weight tensors to float16
        data_dtype = np.float16

    data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4

    gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)


print("gguf: write header")
gguf_writer.write_header_to_file()
print("gguf: write metadata")
gguf_writer.write_kv_data_to_file()
print("gguf: write tensor metadata")
gguf_writer.write_ti_data_to_file()

# tensor data
print("gguf: convert and write tensor data")

for name in list_vars.keys():
    data = list_vars[name].squeeze().numpy()

    # we don't need these
    if name.endswith(".rotary_emb.inv_freq"):
        continue

    # permute these
    if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
        data = permute(data, head_count)

    n_dims = len(data.shape)
    data_dtype = data.dtype 

    if data_dtype != np.float16 and data_dtype != np.float32:
        # convert any unsupported data types to float32
        data = data.astype(np.float32)
    elif ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
        # if f16 desired, convert any float32 2-dim weight tensors to float16
        data = data.astype(np.float16)

    gguf_writer.write_tensor_to_file(data)

gguf_writer.close()


print("gguf: model successfully exported to '" + fname_out + "'" )
print("")
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`# Quick and dirty HF llama --> gguf conversion, GQA/70b wont work`

			`import gguf`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`import gguf_tensor_map as tmap`
			`import os`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`import sys`
			`import struct`
			`import json`
			`import numpy as np`
gguf : update convert-llama-h5-to-gguf.py 2023-07-29 22:09:22 +00:00			`from typing import Any, List`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`from pathlib import Path`
			`from transformers import AutoModelForCausalLM`
			`from sentencepiece import SentencePieceProcessor`


convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`#NDArray = np.ndarray[Any, Any]`
			`# compatible with python < 3.9`
			`NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'`
gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`def permute(weights: NDArray, n_head: int) -> NDArray:`
			`return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])`
			`.swapaxes(1, 2)`
			`.reshape(weights.shape))`

gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`if len(sys.argv) < 3:`
			`print("Usage: convert-h5-to-ggml.py dir-model ftype\n")`
			`print(" ftype == 0 -> float32")`
			`print(" ftype == 1 -> float16")`
			`sys.exit(1)`


			`# output in the same directory as the model`
			`dir_model = sys.argv[1]`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`last_dir = os.path.basename(os.path.normpath(dir_model))`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00

			`# possible tensor data types`
			`# ftype == 0 -> float32`
			`# ftype == 1 -> float16`
			`#`
			`# map from ftype to string`
			`ftype_str = ["f32", "f16"]`

			`ftype = 1`
			`if len(sys.argv) > 2:`
			`ftype = int(sys.argv[2])`
			`if ftype < 0 or ftype > 1:`
			`print("Invalid ftype: " + str(ftype))`
			`sys.exit(1)`
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00
			`fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00
			`print("gguf: loading model "+last_dir)`
fix : update convert-llama-h5-to-gguf.py 2023-07-31 20:58:29 +00:00
			`with open(dir_model + "/config.json", "r", encoding="utf-8") as f:`
			`hparams = json.load(f)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
Update convert-llama-h5-to-gguf.py 2023-07-31 01:02:00 +00:00			`if hparams["architectures"][0] != "LlamaForCausalLM":`
			`print("Model architecture not supported: " + hparams["architectures"][0] )`
			`sys.exit()`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00			`model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`list_vars = model.state_dict()`

			`gguf_writer = gguf.GGUFWriter.open(fname_out)`


convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: get model metadata")`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`llm_arch = "llama"`
			`head_count = hparams["num_attention_heads"]`
			`block_count = hparams["num_hidden_layers"]`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00			`gguf_writer.add_name(last_dir)`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_architecture(llm_arch)`
			`gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])`
			`gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`gguf_writer.add_layer_count(llm_arch, block_count)`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])`
			`gguf_writer.add_rope_dimension_count(llm_arch, hparams["hidden_size"] // hparams["num_attention_heads"])`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`gguf_writer.add_head_count(llm_arch, head_count)`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_layer_norm_rms_eps(llm_arch, hparams["rms_norm_eps"])`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00

			`# TOKENIZATION`

convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: get tokenizer metadata")`
Update convert-llama-h5-to-gguf.py 2023-07-29 19:38:01 +00:00
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`tokens: List[str] = []`
			`scores: List[float] = []`

gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00			`if Path(dir_model + "/tokenizer.model").is_file():`
Update convert-llama-h5-to-gguf.py 2023-07-29 19:38:01 +00:00			`# vocab type sentencepiece`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: get sentencepiece tokenizer vocab and scores")`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00
gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00			`tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`for i in range(tokenizer.vocab_size()):`
			`text: bytes`
			`if tokenizer.is_unknown(i):`
			`text = " \u2047 ".encode("utf-8")`
			`elif tokenizer.is_control(i):`
			`text = b""`
			`if tokenizer.is_byte(i):`
			`piece = tokenizer.id_to_piece(i)`
			`if len(piece) != 6:`
			`raise Exception(f"Invalid token: {piece}")`
			`byte_value = int(piece[3:-1], 16)`
			`text = struct.pack("B", byte_value)`
			`else:`
			`text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")`
			`score: float = tokenizer.get_score(i)`

Update convert-llama-h5-to-gguf.py 2023-07-29 14:47:00 +00:00			`tokens.append(text)`
gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00			`scores.append(score)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_tokenizer_model("llama")`
			`gguf_writer.add_token_list(tokens)`
			`gguf_writer.add_token_scores(scores)`

			`if Path(dir_model + "/tokenizer.json").is_file():`
			`with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:`
			`tokenizer = json.load(f)`

			`if "added_tokens" in tokenizer and Path(dir_model + "/tokenizer_config.json").is_file():`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: get special token ids")`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00
			`with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:`
			`tokenizer_config = json.load(f)`

			`# find special token ids`

			`if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] != None:`
			`for key in tokenizer["added_tokens"]:`
convert-llama-h5-to-gguf.py : special tokens 2023-08-02 09:26:07 +00:00			`if key["content"] == tokenizer_config["bos_token"]["content"]:`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_bos_token_id(key["id"])`

			`if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] != None:`
			`for key in tokenizer["added_tokens"]:`
convert-llama-h5-to-gguf.py : special tokens 2023-08-02 09:26:07 +00:00			`if key["content"] == tokenizer_config["eos_token"]["content"]:`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_eos_token_id(key["id"])`

			`if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] != None:`
			`for key in tokenizer["added_tokens"]:`
convert-llama-h5-to-gguf.py : special tokens 2023-08-02 09:26:07 +00:00			`if key["content"] == tokenizer_config["unk_token"]["content"]:`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_unk_token_id(key["id"])`

			`if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] != None:`
			`for key in tokenizer["added_tokens"]:`
convert-llama-h5-to-gguf.py : special tokens 2023-08-02 09:26:07 +00:00			`if key["content"] == tokenizer_config["sep_token"]["content"]:`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_sep_token_id(key["id"])`

			`if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] != None:`
			`for key in tokenizer["added_tokens"]:`
convert-llama-h5-to-gguf.py : special tokens 2023-08-02 09:26:07 +00:00			`if key["content"] == tokenizer_config["pad_token"]["content"]:`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.add_pad_token_id(key["id"])`

quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`# TENSORS`

convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`tensor_map = tmap.get_tensor_map(block_count)`

quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`# tensor info`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: get tensor metadata")`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`for name in list_vars.keys():`
			`data = list_vars[name].squeeze().numpy()`

			`# we don't need these`
			`if name.endswith(".rotary_emb.inv_freq"):`
			`continue`

			`# permute these`
			`if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`data = permute(data,head_count)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`# map tensor names`
			`if name.endswith(".weight") and name[:-7] in tensor_map:`
			`name = tensor_map[name[:-7]] + ".weight"`
			`elif name.endswith(".bias") and name[:-5] in tensor_map:`
			`name = tensor_map[name[:-5]] + ".bias"`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`else:`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print( "Can not map tensor '" + name + "'" )`
			`sys.exit()`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
Update convert-llama-h5-to-gguf.py 2023-07-29 14:47:00 +00:00			`n_dims = len(data.shape)`
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00			`data_dtype = data.dtype`
Update convert-llama-h5-to-gguf.py 2023-07-29 14:47:00 +00:00
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00			`# print( name + " dims " + str(n_dims) + " dtype " + str(data.dtype) )`

			`if data.dtype != np.float16 and data.dtype != np.float32:`
			`# convert any unsupported data types to float32`
			`data_dtype = np.float32`
			`elif ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:`
			`# if f16 desired, convert any float32 2-dim weight tensors to float16`
			`data_dtype = np.float16`

convert-llama-h5-to-gguf.py : simplify nbytes 2023-08-12 20:30:35 +00:00			`data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4`
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00
			`gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`print("gguf: write header")`
			`gguf_writer.write_header_to_file()`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: write metadata")`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.write_kv_data_to_file()`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: write tensor metadata")`
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.write_ti_data_to_file()`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`# tensor data`
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00			`print("gguf: convert and write tensor data")`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`for name in list_vars.keys():`
			`data = list_vars[name].squeeze().numpy()`

			`# we don't need these`
			`if name.endswith(".rotary_emb.inv_freq"):`
			`continue`

gguf : upd gguf conversion script 2023-07-29 10:31:07 +00:00			`# permute these`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):`
convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`data = permute(data, head_count)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`n_dims = len(data.shape)`
convert-llama-h5-to-gguf.py : no need to convert tensors twice 2023-08-12 19:50:24 +00:00			`data_dtype = data.dtype`

			`if data_dtype != np.float16 and data_dtype != np.float32:`
			`# convert any unsupported data types to float32`
			`data = data.astype(np.float32)`
			`elif ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:`
			`# if f16 desired, convert any float32 2-dim weight tensors to float16`
			`data = data.astype(np.float16)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
convert-llama-h5-to-gguf.py : accumulate kv / ti + special tokens 2023-08-02 09:15:33 +00:00			`gguf_writer.write_tensor_to_file(data)`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00
			`gguf_writer.close()`


convert-llama-h5-to-gguf.py : map tensor names 2023-08-08 22:52:16 +00:00			`print("gguf: model successfully exported to '" + fname_out + "'" )`
quick and dirty conversion example 2023-07-29 09:20:05 +00:00			`print("")`