mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
server : fix system prompt cli (#5516)
This commit is contained in:
parent
f486f6e1e5
commit
5f5808ca7b
@ -436,10 +436,6 @@ struct llama_server_context
|
|||||||
default_generation_settings_for_props["seed"] = -1;
|
default_generation_settings_for_props["seed"] = -1;
|
||||||
|
|
||||||
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
||||||
|
|
||||||
// empty system prompt
|
|
||||||
system_prompt = "";
|
|
||||||
system_tokens.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
||||||
@ -765,13 +761,15 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
void update_system_prompt() {
|
void update_system_prompt() {
|
||||||
|
kv_cache_clear();
|
||||||
|
system_tokens.clear();
|
||||||
|
|
||||||
|
if (!system_prompt.empty()) {
|
||||||
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
||||||
|
|
||||||
llama_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
kv_cache_clear();
|
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
||||||
|
|
||||||
for (int i = 0; i < (int) system_tokens.size(); ++i)
|
|
||||||
{
|
{
|
||||||
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||||
}
|
}
|
||||||
@ -787,6 +785,7 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size());
|
llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LOG_TEE("system prompt updated\n");
|
LOG_TEE("system prompt updated\n");
|
||||||
system_need_update = false;
|
system_need_update = false;
|
||||||
@ -807,11 +806,9 @@ struct llama_server_context
|
|||||||
name_user = sys_props.value("anti_prompt", "");
|
name_user = sys_props.value("anti_prompt", "");
|
||||||
name_assistant = sys_props.value("assistant_name", "");
|
name_assistant = sys_props.value("assistant_name", "");
|
||||||
|
|
||||||
if (slots.size() > 0)
|
|
||||||
{
|
|
||||||
notify_system_prompt_changed();
|
notify_system_prompt_changed();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static size_t find_stopping_strings(const std::string &text, const size_t last_token_size,
|
static size_t find_stopping_strings(const std::string &text, const size_t last_token_size,
|
||||||
const stop_type type, llama_client_slot &slot)
|
const stop_type type, llama_client_slot &slot)
|
||||||
|
Loading…
Reference in New Issue
Block a user