mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 19:04:35 +00:00
fix convert-lora-to-ggml.py (#2738)
This commit is contained in:
parent
5290c38e6e
commit
335acd2ffd
@ -6,23 +6,22 @@ import struct
|
|||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, Sequence, TextIO
|
from typing import Any, Dict, Sequence, TextIO
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType
|
NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1}
|
||||||
|
|
||||||
|
|
||||||
HF_SUBLAYER_TO_GGML = {
|
HF_SUBLAYER_TO_GGML = {
|
||||||
"self_attn.q_proj": "attention.wq",
|
"self_attn.q_proj": "attn_q",
|
||||||
"self_attn.k_proj": "attention.wk",
|
"self_attn.k_proj": "attn_k",
|
||||||
"self_attn.v_proj": "attention.wv",
|
"self_attn.v_proj": "attn_v",
|
||||||
"self_attn.o_proj": "attention.wo",
|
"self_attn.o_proj": "attn_output",
|
||||||
"mlp.gate_proj": "feed_forward.w1",
|
"mlp.gate_proj": "ffn_gate",
|
||||||
"mlp.down_proj": "feed_forward.w2",
|
"mlp.down_proj": "ffn_down",
|
||||||
"mlp.up_proj": "feed_forward.w3",
|
"mlp.up_proj": "ffn_up",
|
||||||
"input_layernorm": "attention_norm",
|
"input_layernorm": "attn_norm",
|
||||||
"post_attention_layernorm": "ffn_norm",
|
"post_attention_layernorm": "ffn_norm",
|
||||||
# "norm": "norm",
|
|
||||||
# "embed_tokens": "tok_embeddings",
|
|
||||||
# "lm_head": "output",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +38,7 @@ def translate_tensor_name(t: str) -> str:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
output_string = (
|
output_string = (
|
||||||
f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
|
f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}"
|
||||||
)
|
)
|
||||||
return output_string
|
return output_string
|
||||||
else:
|
else:
|
||||||
@ -54,12 +53,14 @@ def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None:
|
|||||||
# https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
|
# https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int
|
||||||
# but some models ship a float value instead
|
# but some models ship a float value instead
|
||||||
# let's convert to int, but fail if lossless conversion is not possible
|
# let's convert to int, but fail if lossless conversion is not possible
|
||||||
assert int(params["lora_alpha"]) == params["lora_alpha"], "cannot convert float to int losslessly"
|
assert (
|
||||||
|
int(params["lora_alpha"]) == params["lora_alpha"]
|
||||||
|
), "cannot convert float to int losslessly"
|
||||||
fout.write(struct.pack("i", int(params["lora_alpha"])))
|
fout.write(struct.pack("i", int(params["lora_alpha"])))
|
||||||
|
|
||||||
|
|
||||||
def write_tensor_header(
|
def write_tensor_header(
|
||||||
self, name: str, shape: Sequence[int], data_type: DataType
|
self, name: str, shape: Sequence[int], data_type: np.dtype
|
||||||
) -> None:
|
) -> None:
|
||||||
sname = name.encode("utf-8")
|
sname = name.encode("utf-8")
|
||||||
fout.write(
|
fout.write(
|
||||||
@ -67,7 +68,7 @@ def write_tensor_header(
|
|||||||
"iii",
|
"iii",
|
||||||
len(shape),
|
len(shape),
|
||||||
len(sname),
|
len(sname),
|
||||||
DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]],
|
NUMPY_TYPE_TO_FTYPE[data_type.name],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
||||||
|
Loading…
Reference in New Issue
Block a user