diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index f47052ef1..a3eaabb80 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -98,7 +98,7 @@ class Model: if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None: self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) - if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon"], optional=True)) is not None: + if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None: self.gguf_writer.add_layer_norm_eps(f_norm_eps) if (n_experts := self.hparams.get("num_local_experts")) is not None: self.gguf_writer.add_expert_count(n_experts) @@ -220,6 +220,8 @@ class Model: return NomicBertModel if model_architecture == "GemmaForCausalLM": return GemmaModel + if model_architecture == "Starcoder2ForCausalLM": + return StarCoderModel2 return Model def _is_model_safetensors(self) -> bool: @@ -926,6 +928,10 @@ class StarCoderModel(Model): self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"]) self.gguf_writer.add_file_type(self.ftype) +class StarCoderModel2(Model): + def set_vocab(self): + self._set_vocab_gpt2() + class RefactModel(Model): def set_gguf_parameters(self): diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 16f7ed986..5db760cb1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -275,21 +275,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], - MODEL_ARCH.STARCODER2: [ - MODEL_TENSOR.TOKEN_EMBD, - MODEL_TENSOR.OUTPUT_NORM, - MODEL_TENSOR.OUTPUT, - MODEL_TENSOR.ROPE_FREQS, - MODEL_TENSOR.ATTN_NORM, - MODEL_TENSOR.ATTN_Q, - MODEL_TENSOR.ATTN_K, - MODEL_TENSOR.ATTN_V, - MODEL_TENSOR.ATTN_OUT, - MODEL_TENSOR.ATTN_ROT_EMBD, - MODEL_TENSOR.FFN_NORM, - MODEL_TENSOR.FFN_DOWN, - MODEL_TENSOR.FFN_UP, - ], MODEL_ARCH.BERT: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD_NORM, @@ -543,6 +528,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_NORM, ], + MODEL_ARCH.STARCODER2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], # TODO } diff --git a/llama.cpp b/llama.cpp index 5d6303585..d840d955e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -211,8 +211,8 @@ enum llm_arch { LLM_ARCH_INTERNLM2, LLM_ARCH_MINICPM, LLM_ARCH_GEMMA, + LLM_ARCH_STARCODER2, LLM_ARCH_UNKNOWN, - LLM_ARCH_STARCODER2 }; static std::map LLM_ARCH_NAMES = { @@ -7735,7 +7735,7 @@ struct llm_build_context { cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, model.output_norm_b, - LLM_NORM_RMS, cb, -1); + LLM_NORM, cb, -1); cb(cur, "result_norm", -1); // lm_head