mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
convert.py: Outfile default name change and additional metadata support (#4858)
* convert.py: Outfile default name change and additional metadata support * convert.py: don't stringify Metadata load method output * convert.py: typo fix * convert.py: fix metadata format to sync with LLM_KV_NAMES in llama.cpp
This commit is contained in:
parent
e586ee4259
commit
b1f8af1886
178
convert.py
178
convert.py
@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
|
|||||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable
|
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
@ -344,10 +344,47 @@ class Params:
|
|||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Metadata:
|
||||||
|
name: Optional[str] = None
|
||||||
|
author: Optional[str] = None
|
||||||
|
version: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
description: Optional[str] = None
|
||||||
|
licence: Optional[str] = None
|
||||||
|
source_url: Optional[str] = None
|
||||||
|
source_hf_repo: Optional[str] = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(metadata_path: Path) -> Metadata:
|
||||||
|
if metadata_path is None or not metadata_path.exists():
|
||||||
|
return Metadata()
|
||||||
|
|
||||||
|
with open(metadata_path, 'r') as file:
|
||||||
|
data = json.load(file)
|
||||||
|
|
||||||
|
# Create a new Metadata instance
|
||||||
|
metadata = Metadata()
|
||||||
|
|
||||||
|
# Assigning values to Metadata attributes if they exist in the JSON file
|
||||||
|
# This is based on LLM_KV_NAMES mapping in llama.cpp
|
||||||
|
metadata.name = data.get("general.name")
|
||||||
|
metadata.author = data.get("general.author")
|
||||||
|
metadata.version = data.get("general.version")
|
||||||
|
metadata.url = data.get("general.url")
|
||||||
|
metadata.description = data.get("general.description")
|
||||||
|
metadata.license = data.get("general.license")
|
||||||
|
metadata.source_url = data.get("general.source.url")
|
||||||
|
metadata.source_hf_repo = data.get("general.source.huggingface.repository")
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# vocab
|
# vocab
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class BaseVocab(Protocol):
|
class BaseVocab(Protocol):
|
||||||
tokenizer_model: ClassVar[str]
|
tokenizer_model: ClassVar[str]
|
||||||
@ -1066,21 +1103,42 @@ class OutputFile:
|
|||||||
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
|
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
|
||||||
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
|
||||||
|
# Metadata About The Model And Its Provenence
|
||||||
name = "LLaMA"
|
name = "LLaMA"
|
||||||
|
if metadata is not None and metadata.name is not None:
|
||||||
# TODO: better logic to determine model name
|
name = metadata.name
|
||||||
if params.n_ctx == 4096:
|
|
||||||
name = "LLaMA v2"
|
|
||||||
elif params.path_model is not None:
|
elif params.path_model is not None:
|
||||||
name = str(params.path_model.parent).split('/')[-1]
|
name = str(params.path_model.parent).split("/")[-1]
|
||||||
|
elif params.n_ctx == 4096:
|
||||||
|
# Heuristic detection of LLaMA v2 model
|
||||||
|
name = "LLaMA v2"
|
||||||
|
|
||||||
self.gguf.add_name (name)
|
self.gguf.add_name(name)
|
||||||
self.gguf.add_vocab_size (params.n_vocab)
|
|
||||||
self.gguf.add_context_length (params.n_ctx)
|
if metadata is not None:
|
||||||
self.gguf.add_embedding_length (params.n_embd)
|
if metadata.author is not None:
|
||||||
self.gguf.add_block_count (params.n_layer)
|
self.gguf.add_author(metadata.author)
|
||||||
self.gguf.add_feed_forward_length (params.n_ff)
|
if metadata.version is not None:
|
||||||
|
self.gguf.add_version(metadata.version)
|
||||||
|
if metadata.url is not None:
|
||||||
|
self.gguf.add_url(metadata.url)
|
||||||
|
if metadata.description is not None:
|
||||||
|
self.gguf.add_description(metadata.description)
|
||||||
|
if metadata.licence is not None:
|
||||||
|
self.gguf.add_licence(metadata.licence)
|
||||||
|
if metadata.source_url is not None:
|
||||||
|
self.gguf.add_source_url(metadata.source_url)
|
||||||
|
if metadata.source_hf_repo is not None:
|
||||||
|
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
|
||||||
|
|
||||||
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
|
# Metadata About The Neural Architecture Itself
|
||||||
|
self.gguf.add_vocab_size(params.n_vocab)
|
||||||
|
self.gguf.add_context_length(params.n_ctx)
|
||||||
|
self.gguf.add_embedding_length(params.n_embd)
|
||||||
|
self.gguf.add_block_count(params.n_layer)
|
||||||
|
self.gguf.add_feed_forward_length(params.n_ff)
|
||||||
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
|
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
|
||||||
self.gguf.add_head_count (params.n_head)
|
self.gguf.add_head_count (params.n_head)
|
||||||
self.gguf.add_head_count_kv (params.n_head_kv)
|
self.gguf.add_head_count_kv (params.n_head_kv)
|
||||||
@ -1183,13 +1241,14 @@ class OutputFile:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def write_vocab_only(
|
def write_vocab_only(
|
||||||
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
|
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
|
||||||
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
|
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
|
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out, endianess=endianess)
|
of = OutputFile(fname_out, endianess=endianess)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
|
of.add_meta_model(params, metadata)
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
of.add_meta_vocab(vocab)
|
of.add_meta_vocab(vocab)
|
||||||
of.add_meta_special_vocab(svocab)
|
of.add_meta_special_vocab(svocab)
|
||||||
@ -1216,12 +1275,14 @@ class OutputFile:
|
|||||||
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
|
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
|
||||||
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
||||||
pad_vocab: bool = False,
|
pad_vocab: bool = False,
|
||||||
|
metadata: Metadata = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
|
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out, endianess=endianess)
|
of = OutputFile(fname_out, endianess=endianess)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
|
of.add_meta_model(params, metadata)
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
if isinstance(vocab, Vocab):
|
if isinstance(vocab, Vocab):
|
||||||
of.add_meta_vocab(vocab)
|
of.add_meta_vocab(vocab)
|
||||||
@ -1257,6 +1318,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
|
|||||||
raise ValueError(f"Unexpected combination of types: {name_to_type}")
|
raise ValueError(f"Unexpected combination of types: {name_to_type}")
|
||||||
|
|
||||||
|
|
||||||
|
def model_parameter_count(model: LazyModel) -> int:
|
||||||
|
total_model_parameters = 0
|
||||||
|
for i, (name, lazy_tensor) in enumerate(model.items()):
|
||||||
|
sum_weights_in_tensor = 1
|
||||||
|
for dim in lazy_tensor.shape:
|
||||||
|
sum_weights_in_tensor *= dim
|
||||||
|
total_model_parameters += sum_weights_in_tensor
|
||||||
|
return total_model_parameters
|
||||||
|
|
||||||
|
|
||||||
|
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
|
||||||
|
if model_params_count > 1e12 :
|
||||||
|
# Trillions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-12
|
||||||
|
scale_suffix = "T"
|
||||||
|
elif model_params_count > 1e9 :
|
||||||
|
# Billions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-9
|
||||||
|
scale_suffix = "B"
|
||||||
|
elif model_params_count > 1e6 :
|
||||||
|
# Millions Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-6
|
||||||
|
scale_suffix = "M"
|
||||||
|
else:
|
||||||
|
# Thousands Of Parameters
|
||||||
|
scaled_model_params = model_params_count * 1e-3
|
||||||
|
scale_suffix = "K"
|
||||||
|
|
||||||
|
return f"{round(scaled_model_params)}{scale_suffix}"
|
||||||
|
|
||||||
|
|
||||||
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
||||||
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
|
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
|
||||||
for (name, tensor) in model.items()}
|
for (name, tensor) in model.items()}
|
||||||
@ -1436,13 +1528,35 @@ class VocabFactory:
|
|||||||
return vocab, special_vocab
|
return vocab, special_vocab
|
||||||
|
|
||||||
|
|
||||||
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
|
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
|
||||||
namestr = {
|
quantization = {
|
||||||
GGMLFileType.AllF32: "f32",
|
GGMLFileType.AllF32: "F32",
|
||||||
GGMLFileType.MostlyF16: "f16",
|
GGMLFileType.MostlyF16: "F16",
|
||||||
GGMLFileType.MostlyQ8_0:"q8_0",
|
GGMLFileType.MostlyQ8_0: "Q8_0",
|
||||||
}[file_type]
|
}[file_type]
|
||||||
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
|
|
||||||
|
parameters = model_parameter_count_rounded_notation(model_params_count)
|
||||||
|
|
||||||
|
expert_count = ""
|
||||||
|
if params.n_experts is not None:
|
||||||
|
expert_count = f"{params.n_experts}x"
|
||||||
|
|
||||||
|
version = ""
|
||||||
|
if metadata is not None and metadata.version is not None:
|
||||||
|
version = f"-{metadata.version}"
|
||||||
|
|
||||||
|
name = "ggml-model"
|
||||||
|
if metadata is not None and metadata.name is not None:
|
||||||
|
name = metadata.name
|
||||||
|
elif params.path_model is not None:
|
||||||
|
name = params.path_model.name
|
||||||
|
|
||||||
|
return f"{name}{version}-{expert_count}{parameters}-{quantization}"
|
||||||
|
|
||||||
|
|
||||||
|
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
|
||||||
|
default_filename = default_convention_outfile(file_type, params, model_params_count, metadata)
|
||||||
|
ret = model_paths[0].parent / f"{default_filename}.gguf"
|
||||||
if ret in model_paths:
|
if ret in model_paths:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Error: Default output path ({ret}) would overwrite the input. "
|
f"Error: Default output path ({ret}) would overwrite the input. "
|
||||||
@ -1480,17 +1594,30 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
||||||
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
||||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||||
|
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
|
||||||
|
parser.add_argument("--get-outfile", action="store_true", help="get calculated default outfile name")
|
||||||
|
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
elif args.dump_single or args.dump:
|
elif args.dump_single or args.dump or args.get_outfile:
|
||||||
# Avoid printing anything besides the dump output
|
# Avoid printing anything besides the dump output
|
||||||
logging.basicConfig(level=logging.WARNING)
|
logging.basicConfig(level=logging.WARNING)
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
metadata = Metadata.load(args.metadata)
|
||||||
|
|
||||||
|
if args.get_outfile:
|
||||||
|
model_plus = load_some_model(args.model)
|
||||||
|
params = Params.load(model_plus)
|
||||||
|
model = convert_model_names(model_plus.model, params, args.skip_unknown)
|
||||||
|
model_params_count = model_parameter_count(model_plus.model)
|
||||||
|
ftype = pick_output_type(model, args.outtype)
|
||||||
|
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
|
||||||
|
return
|
||||||
|
|
||||||
if args.no_vocab and args.vocab_only:
|
if args.no_vocab and args.vocab_only:
|
||||||
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
||||||
|
|
||||||
@ -1504,6 +1631,9 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
else:
|
else:
|
||||||
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
|
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
|
||||||
|
|
||||||
|
model_params_count = model_parameter_count(model_plus.model)
|
||||||
|
logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
|
||||||
|
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
@ -1557,7 +1687,7 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
f_norm_eps = 1e-5,
|
f_norm_eps = 1e-5,
|
||||||
)
|
)
|
||||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
||||||
endianess=endianess, pad_vocab=args.pad_vocab)
|
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
|
||||||
logger.info(f"Wrote {outfile}")
|
logger.info(f"Wrote {outfile}")
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -1570,13 +1700,13 @@ def main(args_in: list[str] | None = None) -> None:
|
|||||||
model = convert_model_names(model, params, args.skip_unknown)
|
model = convert_model_names(model, params, args.skip_unknown)
|
||||||
ftype = pick_output_type(model, args.outtype)
|
ftype = pick_output_type(model, args.outtype)
|
||||||
model = convert_to_output_type(model, ftype)
|
model = convert_to_output_type(model, ftype)
|
||||||
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
|
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
|
||||||
|
|
||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
logger.info(f"Writing {outfile}, format {ftype}")
|
logger.info(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
||||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
||||||
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
|
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
|
||||||
logger.info(f"Wrote {outfile}")
|
logger.info(f"Wrote {outfile}")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user