Update convert-llama-h5-to-gguf.py

This commit is contained in:
klosax 2023-07-29 21:38:01 +02:00 committed by GitHub
parent 9577821487
commit 06c3e4a1a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam
# TOKENIZATION
print("write gguf tokenizer")
tokens: List[str] = []
scores: List[float] = []
if Path(dir_model + "/tokenizer.model").is_file():
# vocab type SPIECE
# vocab type sentencepiece
print("Adding sentencepiece tokenizer vocab.")
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file():
tokens.append(text)
scores.append(score)
print("write gguf tokens")
gguf_writer.write_tokenizer_model("llama")
gguf_writer.write_token_list(tokens)
gguf_writer.write_token_scores(scores)
# TENSORS
# tensor info
print("write gguf tensor info")