Update convert-llama-h5-to-gguf.py

This commit is contained in:
klosax 2023-07-29 21:38:01 +02:00 committed by GitHub
parent 9577821487
commit 06c3e4a1a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam
# TOKENIZATION # TOKENIZATION
print("write gguf tokenizer")
tokens: List[str] = [] tokens: List[str] = []
scores: List[float] = [] scores: List[float] = []
if Path(dir_model + "/tokenizer.model").is_file(): if Path(dir_model + "/tokenizer.model").is_file():
# vocab type SPIECE # vocab type sentencepiece
print("Adding sentencepiece tokenizer vocab.") print("Adding sentencepiece tokenizer vocab.")
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model") tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file():
tokens.append(text) tokens.append(text)
scores.append(score) scores.append(score)
print("write gguf tokens")
gguf_writer.write_tokenizer_model("llama") gguf_writer.write_tokenizer_model("llama")
gguf_writer.write_token_list(tokens) gguf_writer.write_token_list(tokens)
gguf_writer.write_token_scores(scores) gguf_writer.write_token_scores(scores)
# TENSORS # TENSORS
# tensor info # tensor info
print("write gguf tensor info") print("write gguf tensor info")