mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
ci : add flake8 to github actions (python linting) (#4129)
Disabled rules: * E203 Whitespace before ':' - disabled because we often use 'C' Style where values are aligned * E211 Whitespace before '(' (E211) - disabled because we often use 'C' Style where values are aligned * E221 Multiple spaces before operator - disabled because we often use 'C' Style where values are aligned * E225 Missing whitespace around operator - disabled because it's broken so often it seems like a standard * E231 Missing whitespace after ',', ';', or ':' - disabled because we often use 'C' Style where values are aligned * E241 Multiple spaces after ',' - disabled because we often use 'C' Style where values are aligned * E251 Unexpected spaces around keyword / parameter equals - disabled because it's broken so often it seems like a standard * E261 At least two spaces before inline comment - disabled because it's broken so often it seems like a standard * E266 Too many leading '#' for block comment - sometimes used as "section" separator * E501 Line too long - disabled because it's broken so often it seems like a standard * E701 Multiple statements on one line (colon) - broken only in convert.py when defining abstract methods (we can use# noqa instead) * E704 Multiple statements on one line - broken only in convert.py when defining abstract methods (we can use# noqa instead)
This commit is contained in:
parent
40a34fe8d0
commit
f23c0359a3
20
.github/workflows/python-lint.yml
vendored
Normal file
20
.github/workflows/python-lint.yml
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
name: flake8 Lint
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
flake8-lint:
|
||||
runs-on: ubuntu-latest
|
||||
name: Lint
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: flake8 Lint
|
||||
uses: py-actions/flake8@v2
|
||||
with:
|
||||
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704"
|
||||
exclude: "examples/*,examples/*/**,*/**/__init__.py"
|
@ -834,6 +834,7 @@ class StableLMModel(Model):
|
||||
|
||||
###### CONVERSION LOGIC ######
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Convert a huggingface model to a GGML compatible file")
|
||||
parser.add_argument(
|
||||
|
@ -14,11 +14,13 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
||||
import gguf
|
||||
|
||||
|
||||
class GGMLFormat(IntEnum):
|
||||
GGML = 0
|
||||
GGMF = 1
|
||||
GGJT = 2
|
||||
|
||||
|
||||
class GGMLFType(IntEnum):
|
||||
ALL_F32 = 0
|
||||
MOSTLY_F16 = 1
|
||||
@ -38,6 +40,7 @@ class GGMLFType(IntEnum):
|
||||
MOSTLY_Q5_K_M = 17
|
||||
MOSTLY_Q6_K = 18
|
||||
|
||||
|
||||
class Hyperparameters:
|
||||
def __init__(self):
|
||||
self.n_vocab = self.n_embd = self.n_mult = self.n_head = 0
|
||||
@ -69,6 +72,7 @@ class Hyperparameters:
|
||||
def __str__(self):
|
||||
return f'<Hyperparameters: n_vocab={self.n_vocab}, n_embd={self.n_embd}, n_mult={self.n_mult}, n_head={self.n_head}, n_layer={self.n_layer}, n_rot={self.n_rot}, n_ff={self.n_ff}, ftype={self.ftype.name}>'
|
||||
|
||||
|
||||
class Vocab:
|
||||
def __init__(self, load_scores = True):
|
||||
self.items = []
|
||||
@ -90,6 +94,7 @@ class Vocab:
|
||||
self.items.append((item_text, item_score))
|
||||
return offset - orig_offset
|
||||
|
||||
|
||||
class Tensor:
|
||||
def __init__(self, use_padding = True):
|
||||
self.name = None
|
||||
@ -123,6 +128,7 @@ class Tensor:
|
||||
# print(n_dims, name_len, dtype, self.dims, self.name, pad)
|
||||
return offset - orig_offset
|
||||
|
||||
|
||||
class GGMLModel:
|
||||
def __init__(self):
|
||||
self.hyperparameters = None
|
||||
@ -187,6 +193,7 @@ class GGMLModel:
|
||||
hp.set_n_ff(self)
|
||||
return offset
|
||||
|
||||
|
||||
class GGMLToGGUF:
|
||||
def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None, special_vocab = None):
|
||||
hp = ggml_model.hyperparameters
|
||||
@ -343,6 +350,7 @@ class GGMLToGGUF:
|
||||
raw_shape = tempdims,
|
||||
raw_dtype = tensor.dtype)
|
||||
|
||||
|
||||
def handle_metadata(cfg, hp):
|
||||
import convert
|
||||
assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
|
||||
@ -373,6 +381,7 @@ def handle_metadata(cfg, hp):
|
||||
convert.check_vocab_size(params, vocab)
|
||||
return (params, vocab, svocab)
|
||||
|
||||
|
||||
def handle_args():
|
||||
parser = argparse.ArgumentParser(description = 'Convert GGML models to GGUF')
|
||||
parser.add_argument('--input', '-i', type = Path, required = True,
|
||||
@ -397,6 +406,7 @@ def handle_args():
|
||||
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
cfg = handle_args()
|
||||
print(f'* Using config: {cfg}')
|
||||
@ -406,7 +416,7 @@ def main():
|
||||
data = np.memmap(cfg.input, mode = 'r')
|
||||
model = GGMLModel()
|
||||
print('* Scanning GGML input file')
|
||||
offset = model.load(data, 0)
|
||||
offset = model.load(data, 0) # noqa
|
||||
print(f'* GGML model hyperparameters: {model.hyperparameters}')
|
||||
vocab_override = None
|
||||
params_override = None
|
||||
@ -421,12 +431,15 @@ def main():
|
||||
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
|
||||
if model.file_format == GGMLFormat.GGML:
|
||||
print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
|
||||
converter = GGMLToGGUF(model, data, cfg,
|
||||
converter = GGMLToGGUF(
|
||||
model, data, cfg,
|
||||
params_override = params_override,
|
||||
vocab_override = vocab_override,
|
||||
special_vocab = special_vocab )
|
||||
special_vocab = special_vocab
|
||||
)
|
||||
converter.save()
|
||||
print(f'* Successful completion. Output saved to: {cfg.output}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -9,6 +9,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
||||
import gguf
|
||||
|
||||
|
||||
def _flatten_dict(dct, tensors, prefix=None):
|
||||
assert isinstance(dct, dict)
|
||||
for key in dct.keys():
|
||||
@ -21,6 +22,7 @@ def _flatten_dict(dct, tensors, prefix=None):
|
||||
raise ValueError(type(dct[key]))
|
||||
return None
|
||||
|
||||
|
||||
def _get_sentencepiece_tokenizer_info(dir_model: Path):
|
||||
tokenizer_path = dir_model / 'adept_vocab.model'
|
||||
print('gguf: getting sentencepiece tokenizer from', tokenizer_path)
|
||||
@ -54,6 +56,7 @@ def _get_sentencepiece_tokenizer_info(dir_model: Path):
|
||||
pass
|
||||
return tokens, scores, toktypes
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Convert a Persimmon model from Adept (e.g. Persimmon 8b chat) to a GGML compatible file")
|
||||
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
||||
@ -125,6 +128,5 @@ def main():
|
||||
print("")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
22
convert.py
Executable file → Normal file
22
convert.py
Executable file → Normal file
@ -46,6 +46,7 @@ DEFAULT_CONCURRENCY = 8
|
||||
# data types
|
||||
#
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DataType:
|
||||
name: str
|
||||
@ -55,15 +56,18 @@ class DataType:
|
||||
def elements_to_bytes(self, n_elements: int) -> int:
|
||||
return n_elements * self.dtype.itemsize
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UnquantizedDataType(DataType):
|
||||
pass
|
||||
|
||||
|
||||
DT_F16 = UnquantizedDataType('F16', dtype = np.dtype(np.float16), valid_conversions = ['F32', 'Q8_0'])
|
||||
DT_F32 = UnquantizedDataType('F32', dtype = np.dtype(np.float32), valid_conversions = ['F16', 'Q8_0'])
|
||||
DT_I32 = UnquantizedDataType('I32', dtype = np.dtype(np.int16), valid_conversions = [])
|
||||
DT_BF16 = UnquantizedDataType('BF16', dtype = np.dtype(np.uint16), valid_conversions = ['F32', 'F16', 'Q8_0'])
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QuantizedDataType(DataType):
|
||||
block_size: int
|
||||
@ -77,6 +81,7 @@ class QuantizedDataType(DataType):
|
||||
assert n_elements % self.block_size == 0, f'Invalid number of elements {n_elements} for {self.name} with block size {self.block_size}'
|
||||
return self.quantized_dtype.itemsize * (n_elements // self.block_size)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Q8_0QuantizedDataType(QuantizedDataType):
|
||||
# Mini Q8_0 quantization in Python!
|
||||
@ -86,6 +91,7 @@ class Q8_0QuantizedDataType(QuantizedDataType):
|
||||
n_blocks = arr.size // self.block_size
|
||||
blocks = arr.reshape((n_blocks, self.block_size))
|
||||
# Much faster implementation of block quantization contributed by @Cebtenzzre
|
||||
|
||||
def quantize_blocks_q8_0(blocks: NDArray) -> Iterable[tuple[Any, Any]]:
|
||||
d = abs(blocks).max(axis = 1) / np.float32(127)
|
||||
with np.errstate(divide = 'ignore'):
|
||||
@ -94,6 +100,7 @@ class Q8_0QuantizedDataType(QuantizedDataType):
|
||||
yield from zip(d, qs)
|
||||
return np.fromiter(quantize_blocks_q8_0(blocks), count = n_blocks, dtype = self.quantized_dtype)
|
||||
|
||||
|
||||
DT_Q8_0 = Q8_0QuantizedDataType('Q8_0',
|
||||
dtype = np.dtype(np.float32), valid_conversions = [],
|
||||
ggml_type = gguf.GGMLQuantizationType.Q8_0, block_size = 32,
|
||||
@ -116,6 +123,8 @@ SAFETENSORS_DATA_TYPES: dict[str, DataType] = {
|
||||
# TODO: match this with `llama_ftype`
|
||||
# TODO: rename to LLAMAFileType
|
||||
# TODO: move to `gguf.py`
|
||||
|
||||
|
||||
class GGMLFileType(enum.IntEnum):
|
||||
AllF32 = 0
|
||||
MostlyF16 = 1 # except 1d tensors
|
||||
@ -128,6 +137,7 @@ class GGMLFileType(enum.IntEnum):
|
||||
# 1D tensors are always F32.
|
||||
return dt if len(tensor.shape) > 1 else DT_F32
|
||||
|
||||
|
||||
GGML_FILE_TYPE_TO_DATA_TYPE: dict[GGMLFileType, DataType] = {
|
||||
GGMLFileType.AllF32 : DT_F32,
|
||||
GGMLFileType.MostlyF16 : DT_F16,
|
||||
@ -138,6 +148,7 @@ GGML_FILE_TYPE_TO_DATA_TYPE: dict[GGMLFileType, DataType] = {
|
||||
# hparams loading
|
||||
#
|
||||
|
||||
|
||||
@dataclass
|
||||
class Params:
|
||||
n_vocab: int
|
||||
@ -326,7 +337,6 @@ class BpeVocab:
|
||||
|
||||
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
tokenizer = self.bpe_tokenizer
|
||||
from transformers.models.gpt2 import tokenization_gpt2
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.items()}
|
||||
|
||||
for i, _ in enumerate(tokenizer):
|
||||
@ -406,6 +416,7 @@ class SentencePieceVocab:
|
||||
def __repr__(self) -> str:
|
||||
return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||
|
||||
|
||||
Vocab: TypeAlias = 'BpeVocab | SentencePieceVocab'
|
||||
|
||||
#
|
||||
@ -413,6 +424,7 @@ Vocab: TypeAlias = 'BpeVocab | SentencePieceVocab'
|
||||
# TODO: reuse (probably move to gguf.py?)
|
||||
#
|
||||
|
||||
|
||||
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
||||
# print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
|
||||
if n_head_kv is not None and n_head != n_head_kv:
|
||||
@ -588,6 +600,7 @@ def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -> LazyTe
|
||||
return lazy_tensor.load().permute(n_head, n_head_kv)
|
||||
return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_head_kv}) ' + lazy_tensor.description)
|
||||
|
||||
|
||||
def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int, n_head_kv: int) -> LazyTensor:
|
||||
def load() -> Tensor:
|
||||
return lazy_tensor.load().permute_part(n_part, n_head, n_head_kv)
|
||||
@ -595,6 +608,7 @@ def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int, n_head_
|
||||
s[0] = s[0] // 3
|
||||
return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}, {n_head_kv}) ' + lazy_tensor.description)
|
||||
|
||||
|
||||
def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
|
||||
def load() -> Tensor:
|
||||
return lazy_tensor.load().part(n_part)
|
||||
@ -744,6 +758,7 @@ def lazy_load_file(path: Path) -> ModelPlus:
|
||||
In = TypeVar('In')
|
||||
Out = TypeVar('Out')
|
||||
|
||||
|
||||
def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int, max_workers: int | None = None, use_processpool_executor: bool = False) -> Iterable[Out]:
|
||||
'''Parallel map, but with backpressure. If the caller doesn't call `next`
|
||||
fast enough, this will stop calling `func` at some point rather than
|
||||
@ -778,6 +793,7 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc
|
||||
break
|
||||
yield result
|
||||
|
||||
|
||||
def check_vocab_size(params: Params, vocab: Vocab) -> None:
|
||||
if params.n_vocab != vocab.vocab_size:
|
||||
assert isinstance(vocab, BpeVocab) or isinstance(vocab, SentencePieceVocab)
|
||||
@ -938,6 +954,7 @@ class OutputFile:
|
||||
|
||||
of.close()
|
||||
|
||||
|
||||
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
|
||||
wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) +".weight"].data_type
|
||||
|
||||
@ -952,10 +969,12 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
|
||||
|
||||
raise Exception(f"Unexpected combination of types: {name_to_type}")
|
||||
|
||||
|
||||
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
||||
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
|
||||
for (name, tensor) in model.items()}
|
||||
|
||||
|
||||
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||
tmap = gguf.TensorNameMap(ARCH, params.n_layer)
|
||||
should_skip: set[gguf.MODEL_TENSOR] = set(gguf.MODEL_TENSOR_SKIP.get(ARCH, []))
|
||||
@ -993,6 +1012,7 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def nth_multifile_path(path: Path, n: int) -> Path | None:
|
||||
'''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
|
||||
the nth path in the model.
|
||||
|
Loading…
Reference in New Issue
Block a user