convert : remove fsep token from GPTRefactForCausalLM (#8237)

The <filename> token used by Refact doesn't serve
the same purpose as the <file_separator> from CodeGemma.

Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
This commit is contained in:
Jiří Podivín 2024-07-12 10:06:33 +02:00 committed by GitHub
parent 71c1121d11
commit 5aefbce27a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1203,11 +1203,10 @@ class RefactModel(Model):
# TODO: how to determine special FIM tokens automatically? # TODO: how to determine special FIM tokens automatically?
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False, special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot']) special_token_types = ['prefix', 'suffix', 'middle', 'eot'])
special_vocab._set_special_token("prefix", 1) special_vocab._set_special_token("prefix", 1)
special_vocab._set_special_token("suffix", 3) special_vocab._set_special_token("suffix", 3)
special_vocab._set_special_token("middle", 2) special_vocab._set_special_token("middle", 2)
special_vocab._set_special_token("fsep", 4) # is this correct?
special_vocab.add_to_gguf(self.gguf_writer) special_vocab.add_to_gguf(self.gguf_writer)
def set_gguf_parameters(self): def set_gguf_parameters(self):