From 677058f470507e053f1a1679ebab40ad1c872f77 Mon Sep 17 00:00:00 2001 From: liyuhang Date: Fri, 8 Nov 2024 03:33:43 +0000 Subject: [PATCH] add glm edge chat model --- convert_hf_to_gguf.py | 11 +++++++++-- src/llama.cpp | 32 +++++++++++++++++++++++++++----- tests/test-chat-template.cpp | 4 ++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 76ee6cef5..30e1982ad 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -625,7 +625,7 @@ class Model: if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code res = "jina-v2-code" - if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b": + if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": # ref: https://huggingface.co/THUDM/glm-4-9b-chat res = "chatglm-bpe" if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee": @@ -3989,7 +3989,14 @@ class ChatGLMModel(Model): self.gguf_writer.add_head_count_kv(n_head_kv) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layernorm_epsilon"]) self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_rope_dimension_count(64) + if "attention_dim" in self.hparams: + rope_dim = self.hparams["attention_dim"] + else: + rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] + if "THUDM/glm4-nano" in self.hparams.get("_name_or_path", "") or "THUDM/glm4-mini" in self.hparams.get("_name_or_path", ""): + self.gguf_writer.add_rope_dimension_count(rope_dim) + else: + self.gguf_writer.add_rope_dimension_count(int(rope_dim/2)) self.gguf_writer.add_add_bos_token(False) rope_freq = 10000 if "rope_ratio" in self.hparams: diff --git a/src/llama.cpp b/src/llama.cpp index 034441e1f..fbe486360 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5912,8 +5912,20 @@ static void llm_load_hparams( { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { - case 28: model.type = e_model::MODEL_6B; break; - case 40: model.type = e_model::MODEL_9B; break; + case 28: { + if(hparams.n_head(0)==16){ + model.type = e_model::MODEL_1_6B; + }else{ + model.type = e_model::MODEL_6B; + } + }break; + case 40:{ + if(hparams.n_head(0)==24){ + model.type = e_model::MODEL_4B; + }else{ + model.type = e_model::MODEL_9B; + } + } break; default: model.type = e_model::MODEL_UNKNOWN; } } break; @@ -8859,7 +8871,7 @@ static bool llm_load_tensors( layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, 0); - layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, 0); + layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED); layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); @@ -15722,8 +15734,10 @@ struct llm_build_context { cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wqkv, cur); cb(cur, "wqkv", il); - cur = ggml_add(ctx0, cur, model.layers[il].bqkv); - cb(cur, "bqkv", il); + if(model.layers[il].bqkv){ + cur = ggml_add(ctx0, cur, model.layers[il].bqkv); + cb(cur, "bqkv", il); + } Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); @@ -21742,6 +21756,14 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "<|assistant|>"; } + } else if(tmpl == "glm-edge" || tmpl_contains("<|assistant|>")){ + for (auto message : chat) { + std::string role(message->role); + ss << "<|" << role << "|>" << "\n" << message->content; + } + if (add_ass) { + ss << "<|assistant|>"; + } } else if (tmpl == "minicpm" || tmpl_contains(LU8("<用户>"))) { // MiniCPM-3B-OpenHermes-2.5-v2-GGUF for (auto message : chat) { diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 03e897e66..128bcf9cd 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -61,6 +61,8 @@ int main(void) { "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", // ChatGLM4 u8"[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", + // GLM-edge + "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] +'\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}", // MiniCPM-3B-OpenHermes-2.5-v2-GGUF u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + ''}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}", // DeepSeek-V2 @@ -107,6 +109,8 @@ int main(void) { "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>", // ChatGLM4 "[gMASK]<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", + // GLM-Edge + "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", // MiniCPM-3B-OpenHermes-2.5-v2-GGUF u8"You are a helpful assistant<用户>HelloHi there<用户>Who are youI am an assistant<用户>Another question", // DeepSeek-V2