From c837981bba7cf6839b69d32b25552ce685936b14 Mon Sep 17 00:00:00 2001 From: daminho <37615795+daminho@users.noreply.github.com> Date: Thu, 12 Sep 2024 20:28:20 +0900 Subject: [PATCH] py : add Phi-1.5/Phi-2 tokenizer (#9361) * add phi2 tokenizer * add phi name to convert_hf_to_gguf_update.py * make tokenizer_pre consistent; llama.cpp work --- convert_hf_to_gguf.py | 3 +++ convert_hf_to_gguf_update.py | 1 + 2 files changed, 4 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f02c65026..01a8a50a2 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -626,6 +626,9 @@ class Model: if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae": # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct res = "exaone" + if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085": + # ref: https://huggingface.co/microsoft/phi-2 + res = "phi-2" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 59a0b81a1..021f65abd 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -98,6 +98,7 @@ models = [ {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", }, {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", }, {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", }, + {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", }, ]