Compare commits

...

3 Commits

Author SHA1 Message Date
Kante Yin
db65503fa4
Merge 7323304092 into 9a483999a6 2025-01-12 20:54:31 +08:00
Xuan Son Nguyen
9a483999a6
llama : fix chat template gguf key (#11201) 2025-01-12 13:45:14 +01:00
kerthcet
7323304092 Add llmaz as another platform to run llama.cpp on Kubernetes
Signed-off-by: kerthcet <kerthcet@gmail.com>
2024-08-20 10:43:41 +08:00
3 changed files with 4 additions and 10 deletions

View File

@ -200,6 +200,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
<details>
<summary>Infrastructure</summary>
- [llmaz](https://github.com/InftyAI/llmaz) - ☸️ Effortlessly serve state-of-the-art LLMs on Kubernetes, see [llama.cpp example](https://github.com/InftyAI/llmaz/tree/main/docs/examples/llamacpp) here.
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs
- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly

View File

@ -1636,15 +1636,8 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
//
std::string common_get_builtin_chat_template(const struct llama_model * model) {
static const char * template_key = "tokenizer.chat_template";
// call with NULL buffer to get the total size of the string
int32_t res = llama_model_meta_val_str(model, template_key, NULL, 0);
if (res > 0) {
std::vector<char> model_template(res + 1, 0);
llama_model_meta_val_str(model, template_key, model_template.data(), model_template.size());
return std::string(model_template.data(), model_template.size() - 1);
}
return "";
const char * ptr_tmpl = llama_model_chat_template(model);
return ptr_tmpl == nullptr ? "" : ptr_tmpl;
}
bool common_chat_verify_template(const std::string & tmpl) {

View File

@ -178,7 +178,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat.template" },
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },