From 06c3e4a1a7eec3db5ef69c9f31dfa55b22c4e778 Mon Sep 17 00:00:00 2001 From: klosax <131523366+klosax@users.noreply.github.com> Date: Sat, 29 Jul 2023 21:38:01 +0200 Subject: [PATCH] Update convert-llama-h5-to-gguf.py --- convert-llama-h5-to-gguf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index d36e6da9a..ba9e9f677 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam # TOKENIZATION +print("write gguf tokenizer") + tokens: List[str] = [] scores: List[float] = [] if Path(dir_model + "/tokenizer.model").is_file(): - # vocab type SPIECE + # vocab type sentencepiece print("Adding sentencepiece tokenizer vocab.") tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model") @@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file(): tokens.append(text) scores.append(score) -print("write gguf tokens") - gguf_writer.write_tokenizer_model("llama") gguf_writer.write_token_list(tokens) gguf_writer.write_token_scores(scores) # TENSORS - # tensor info print("write gguf tensor info")