From 9b464b4e81dce67371de0b33537c7af3b9960c45 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 16 Jan 2024 13:38:54 +0200 Subject: [PATCH] py : fix missing added_tokens_dict for SPM vocab --- convert.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert.py b/convert.py index 3b613eefc..316028592 100755 --- a/convert.py +++ b/convert.py @@ -466,6 +466,7 @@ class SentencePieceVocab: # LlaMa ) # Token pieces that were added to the base vocabulary. + self.added_tokens_dict = added_tokens self.added_tokens_list = [new_tokens[id] for id in actual_new_ids] self.vocab_size_base = vocab_size self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)