mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 12:24:35 +00:00
py : fix missing added_tokens_dict for SPM vocab
This commit is contained in:
parent
a0b3ac8c48
commit
9b464b4e81
@ -466,6 +466,7 @@ class SentencePieceVocab: # LlaMa
|
||||
)
|
||||
|
||||
# Token pieces that were added to the base vocabulary.
|
||||
self.added_tokens_dict = added_tokens
|
||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
|
Loading…
Reference in New Issue
Block a user