mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-30 21:34:36 +00:00
py : fix missing added_tokens_dict for SPM vocab
This commit is contained in:
parent
a0b3ac8c48
commit
9b464b4e81
@ -466,6 +466,7 @@ class SentencePieceVocab: # LlaMa
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Token pieces that were added to the base vocabulary.
|
# Token pieces that were added to the base vocabulary.
|
||||||
|
self.added_tokens_dict = added_tokens
|
||||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||||
self.vocab_size_base = vocab_size
|
self.vocab_size_base = vocab_size
|
||||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||||
|
Loading…
Reference in New Issue
Block a user