mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
gguf-py : fix some metadata name extraction edge cases
* convert_lora : use the lora dir for the model card path
This commit is contained in:
parent
87e397d00b
commit
2164c9deb3
@ -62,6 +62,7 @@ class Model:
|
||||
gguf_writer: gguf.GGUFWriter
|
||||
model_name: str | None
|
||||
metadata_override: Path | None
|
||||
dir_model_card: Path
|
||||
|
||||
# subclasses should define this!
|
||||
model_arch: gguf.MODEL_ARCH
|
||||
@ -90,6 +91,7 @@ class Model:
|
||||
self.tensor_names = None
|
||||
self.metadata_override = metadata_override
|
||||
self.model_name = model_name
|
||||
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
|
||||
|
||||
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
|
||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||
@ -345,7 +347,7 @@ class Model:
|
||||
|
||||
total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count()
|
||||
|
||||
self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params)
|
||||
self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, self.dir_model_card, total_params)
|
||||
|
||||
# Fallback to model directory name if metadata name is still missing
|
||||
if self.metadata.name is None:
|
||||
|
@ -304,12 +304,6 @@ if __name__ == '__main__':
|
||||
# load base model
|
||||
logger.info(f"Loading base model: {dir_base_model.name}")
|
||||
hparams = Model.load_hparams(dir_base_model)
|
||||
|
||||
with open(lora_config, "r") as f:
|
||||
lparams: dict[str, Any] = json.load(f)
|
||||
|
||||
alpha: float = lparams["lora_alpha"]
|
||||
|
||||
with torch.inference_mode():
|
||||
try:
|
||||
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
||||
@ -320,12 +314,21 @@ if __name__ == '__main__':
|
||||
class LoraModel(model_class):
|
||||
model_arch = model_class.model_arch
|
||||
|
||||
lora_alpha: float
|
||||
|
||||
def __init__(self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs):
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self.dir_model_card = dir_lora_model
|
||||
self.lora_alpha = float(lora_alpha)
|
||||
|
||||
def set_type(self):
|
||||
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
||||
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
|
||||
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
|
||||
super().set_gguf_parameters()
|
||||
|
||||
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||
@ -368,6 +371,11 @@ if __name__ == '__main__':
|
||||
yield (dest_name + ".lora_a", lora_a)
|
||||
yield (dest_name + ".lora_b", lora_b)
|
||||
|
||||
with open(lora_config, "r") as f:
|
||||
lparams: dict[str, Any] = json.load(f)
|
||||
|
||||
alpha: float = lparams["lora_alpha"]
|
||||
|
||||
model_instance = LoraModel(
|
||||
dir_base_model,
|
||||
ftype,
|
||||
@ -376,6 +384,8 @@ if __name__ == '__main__':
|
||||
use_temp_file=False,
|
||||
eager=args.no_lazy,
|
||||
dry_run=args.dry_run,
|
||||
dir_lora_model=dir_lora,
|
||||
lora_alpha=alpha,
|
||||
)
|
||||
|
||||
logger.info("Exporting model...")
|
||||
|
@ -44,7 +44,7 @@ class Metadata:
|
||||
datasets: Optional[list[str]] = None
|
||||
|
||||
@staticmethod
|
||||
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
||||
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, model_card_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
|
||||
# This grabs as many contextual authorship metadata as possible from the model repository
|
||||
# making any conversion as required to match the gguf kv store metadata format
|
||||
# as well as giving users the ability to override any authorship metadata that may be incorrect
|
||||
@ -52,11 +52,14 @@ class Metadata:
|
||||
# Create a new Metadata instance
|
||||
metadata = Metadata()
|
||||
|
||||
model_card = Metadata.load_model_card(model_path)
|
||||
if model_card_path is None:
|
||||
model_card_path = model_path
|
||||
|
||||
model_card = Metadata.load_model_card(model_card_path)
|
||||
hf_params = Metadata.load_hf_parameters(model_path)
|
||||
|
||||
# heuristics
|
||||
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
|
||||
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_card_path, total_params)
|
||||
|
||||
# Metadata Override File Provided
|
||||
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||
@ -177,6 +180,12 @@ class Metadata:
|
||||
org_component = None
|
||||
|
||||
name_parts: list[str] = model_full_name_component.split('-')
|
||||
|
||||
# Remove empty parts
|
||||
for i in reversed(range(len(name_parts))):
|
||||
if len(name_parts[i]) == 0:
|
||||
del name_parts[i]
|
||||
|
||||
name_types: list[
|
||||
set[Literal["basename", "size_label", "finetune", "version", "type"]]
|
||||
] = [set() for _ in name_parts]
|
||||
@ -227,6 +236,13 @@ class Metadata:
|
||||
if part.lower() == "lora":
|
||||
name_parts[i] = "LoRA"
|
||||
|
||||
# Ignore word-based size labels when there is at least a number-based one present
|
||||
if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
|
||||
for n, t in zip(name_parts, name_types):
|
||||
if "size_label" in t:
|
||||
if all(c.isalpha() for c in n):
|
||||
t.remove("size_label")
|
||||
|
||||
at_start = True
|
||||
# Find the basename through the annotated name
|
||||
for part, t in zip(name_parts, name_types):
|
||||
@ -247,7 +263,8 @@ class Metadata:
|
||||
break
|
||||
|
||||
basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
|
||||
size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None
|
||||
# Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
|
||||
size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
|
||||
finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
|
||||
# TODO: should the basename version always be excluded?
|
||||
# TODO: should multiple versions be joined together?
|
||||
|
@ -54,7 +54,7 @@ class TestMetadataMethod(unittest.TestCase):
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"),
|
||||
('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, '8B'))
|
||||
|
||||
# Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing...
|
||||
# Non standard naming
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"),
|
||||
('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B'))
|
||||
|
||||
@ -71,7 +71,7 @@ class TestMetadataMethod(unittest.TestCase):
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3),
|
||||
('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K'))
|
||||
|
||||
# None standard and not easy to disambiguate
|
||||
# Non standard and not easy to disambiguate
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"),
|
||||
('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None))
|
||||
|
||||
@ -123,6 +123,20 @@ class TestMetadataMethod(unittest.TestCase):
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"),
|
||||
('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B'))
|
||||
|
||||
# Ignore full-text size labels when there are number-based ones, and deduplicate size labels
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("MaziyarPanahi/GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1"),
|
||||
('GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1', 'MaziyarPanahi', 'GreenNode-mini', 'multilingual-v1olet-Mistral-Instruct', 'v0.1', '7B'))
|
||||
|
||||
# Version at the end with a long basename
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/Mistral-Nemo-Base-2407"),
|
||||
('Mistral-Nemo-Base-2407', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None))
|
||||
|
||||
## Invalid cases ##
|
||||
|
||||
# Start with a dash and has dashes in rows
|
||||
self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/-Mistral--Nemo-Base-2407-"),
|
||||
('-Mistral--Nemo-Base-2407-', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None))
|
||||
|
||||
def test_apply_metadata_heuristic_from_model_card(self):
|
||||
model_card = {
|
||||
'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'],
|
||||
|
Loading…
Reference in New Issue
Block a user