From 50d1a035f0f8b35eeb850f5dc50fe68a8b39c55d Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Fri, 19 Jul 2024 22:46:35 -0400 Subject: [PATCH] convert_hf : fix Gemma v1 not setting BOS and EOS tokens --- convert_hf_to_gguf.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ba47fdbf1..688010915 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1310,6 +1310,7 @@ class RefactModel(Model): special_vocab._set_special_token("prefix", 1) special_vocab._set_special_token("suffix", 3) special_vocab._set_special_token("middle", 2) + special_vocab.chat_template = None # do not add it twice special_vocab.add_to_gguf(self.gguf_writer) def set_gguf_parameters(self): @@ -2466,13 +2467,7 @@ class GemmaModel(Model): model_arch = gguf.MODEL_ARCH.GEMMA def set_vocab(self): - tokens, scores, toktypes = self._create_vocab_sentencepiece() - - self.gguf_writer.add_tokenizer_model("llama") - self.gguf_writer.add_tokenizer_pre("default") - self.gguf_writer.add_token_list(tokens) - self.gguf_writer.add_token_scores(scores) - self.gguf_writer.add_token_types(toktypes) + self._set_vocab_sentencepiece() # TODO: these special tokens should be exported only for the CodeGemma family special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False, @@ -2482,6 +2477,7 @@ class GemmaModel(Model): special_vocab._set_special_token("middle", 68) special_vocab._set_special_token("fsep", 70) special_vocab._set_special_token("eot", 107) + special_vocab.chat_template = None # do not add it twice special_vocab.add_to_gguf(self.gguf_writer) self.gguf_writer.add_add_space_prefix(False)