mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 04:24:30 +00:00
convert.py : fix HF tensor permuting / unpacking
ggml-ci
This commit is contained in:
parent
78e1e57862
commit
acaa98234a
20
convert.py
20
convert.py
@ -812,6 +812,23 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM
|
|||||||
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||||
tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
|
tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
|
||||||
|
|
||||||
|
tmp = model
|
||||||
|
|
||||||
|
# HF models permut or pack some of the tensors, so we need to undo that
|
||||||
|
for i in itertools.count():
|
||||||
|
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
||||||
|
print(f"Permuting layer {i}")
|
||||||
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
|
||||||
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
|
||||||
|
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
||||||
|
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
||||||
|
print(f"Unpacking and permuting layer {i}")
|
||||||
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head_kv)
|
||||||
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
|
||||||
|
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
out: LazyModel = {}
|
out: LazyModel = {}
|
||||||
for name, lazy_tensor in model.items():
|
for name, lazy_tensor in model.items():
|
||||||
name_new = name
|
name_new = name
|
||||||
@ -825,8 +842,9 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
|||||||
else:
|
else:
|
||||||
raise Exception(f"Unexpected tensor name: {name}")
|
raise Exception(f"Unexpected tensor name: {name}")
|
||||||
|
|
||||||
if gguf.should_skip_tensor(ARCH, params.n_layer, name_new):
|
if gguf.should_skip_tensor_TMP(ARCH, params.n_layer, name_new):
|
||||||
print(f"skipping tensor {name_new}")
|
print(f"skipping tensor {name_new}")
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type} | {lazy_tensor.shape}")
|
print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type} | {lazy_tensor.shape}")
|
||||||
out[name_new] = lazy_tensor
|
out[name_new] = lazy_tensor
|
||||||
|
6
gguf.py
6
gguf.py
@ -148,7 +148,11 @@ MODEL_TENSOR_SKIP = {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
def should_skip_tensor(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool:
|
# TODO: the following helper functions should be removed
|
||||||
|
# instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR)
|
||||||
|
# however, my Python is very bad, and I couldn't figure out how to do this, hence these functions
|
||||||
|
# REMOVE
|
||||||
|
def should_skip_tensor_TMP(arch : MODEL_ARCH, n_blocks : int, name : str) -> bool:
|
||||||
for skip in MODEL_TENSOR_SKIP.get(arch, []):
|
for skip in MODEL_TENSOR_SKIP.get(arch, []):
|
||||||
for i in range(n_blocks):
|
for i in range(n_blocks):
|
||||||
if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):
|
if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):
|
||||||
|
Loading…
Reference in New Issue
Block a user