mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
Use model->gguf_kv for loading the template instead of using the C API. (#10868)
* Bump model_template to 16384 bytes to support larger chat templates. * Use `model->gguf_kv` for efficiency.
This commit is contained in:
parent
081b29bd2a
commit
d62b532c52
@ -22651,15 +22651,15 @@ int32_t llama_chat_apply_template(
|
|||||||
std::string curr_tmpl(tmpl == nullptr ? "" : tmpl);
|
std::string curr_tmpl(tmpl == nullptr ? "" : tmpl);
|
||||||
if (tmpl == nullptr) {
|
if (tmpl == nullptr) {
|
||||||
GGML_ASSERT(model != nullptr);
|
GGML_ASSERT(model != nullptr);
|
||||||
// load template from model
|
|
||||||
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
// load template from model, if available
|
||||||
std::string template_key = "tokenizer.chat_template";
|
const auto & it = model->gguf_kv.find("tokenizer.chat_template");
|
||||||
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
if (it != model->gguf_kv.end() && it->second.size() > 0) {
|
||||||
if (res < 0) {
|
curr_tmpl = it->second;
|
||||||
|
}
|
||||||
|
else {
|
||||||
// worst case: there is no information about template, we will use chatml by default
|
// worst case: there is no information about template, we will use chatml by default
|
||||||
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
|
curr_tmpl = "chatml"; // see llama_chat_apply_template_internal
|
||||||
} else {
|
|
||||||
curr_tmpl = std::string(model_template.data(), model_template.size());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user