mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 20:04:35 +00:00
Update convert-llama-h5-to-gguf.py
This commit is contained in:
parent
9577821487
commit
06c3e4a1a7
@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam
|
|||||||
|
|
||||||
# TOKENIZATION
|
# TOKENIZATION
|
||||||
|
|
||||||
|
print("write gguf tokenizer")
|
||||||
|
|
||||||
tokens: List[str] = []
|
tokens: List[str] = []
|
||||||
scores: List[float] = []
|
scores: List[float] = []
|
||||||
|
|
||||||
if Path(dir_model + "/tokenizer.model").is_file():
|
if Path(dir_model + "/tokenizer.model").is_file():
|
||||||
# vocab type SPIECE
|
# vocab type sentencepiece
|
||||||
print("Adding sentencepiece tokenizer vocab.")
|
print("Adding sentencepiece tokenizer vocab.")
|
||||||
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
||||||
|
|
||||||
@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file():
|
|||||||
tokens.append(text)
|
tokens.append(text)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
|
|
||||||
print("write gguf tokens")
|
|
||||||
|
|
||||||
gguf_writer.write_tokenizer_model("llama")
|
gguf_writer.write_tokenizer_model("llama")
|
||||||
gguf_writer.write_token_list(tokens)
|
gguf_writer.write_token_list(tokens)
|
||||||
gguf_writer.write_token_scores(scores)
|
gguf_writer.write_token_scores(scores)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
||||||
|
|
||||||
# tensor info
|
# tensor info
|
||||||
print("write gguf tensor info")
|
print("write gguf tensor info")
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user