server : fix system prompt cli (#5516)

2024-12-25 02:44:36 +00:00 · 2024-02-16 11:00:56 +01:00 · 2024-02-16 11:00:56 +01:00 · 5f5808ca7b
commit 5f5808ca7b
parent f486f6e1e5
1 changed files with 22 additions and 25 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -436,10 +436,6 @@ struct llama_server_context
        default_generation_settings_for_props["seed"] = -1;
        batch = llama_batch_init(n_ctx, 0, params.n_parallel);
        // empty system prompt
        system_prompt = "";
        system_tokens.clear();
    }
    std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
@ -765,13 +761,15 @@ struct llama_server_context
    }
    void update_system_prompt() {
        kv_cache_clear();
        system_tokens.clear();
        if (!system_prompt.empty()) {
            system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
            llama_batch_clear(batch);
-        kv_cache_clear();
+            for (int i = 0; i < (int)system_tokens.size(); ++i)
        for (int i = 0; i < (int) system_tokens.size(); ++i)
            {
                llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
            }
@ -787,6 +785,7 @@ struct llama_server_context
            {
                llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size());
            }
        }
        LOG_TEE("system prompt updated\n");
        system_need_update = false;
@ -807,11 +806,9 @@ struct llama_server_context
        name_user      = sys_props.value("anti_prompt", "");
        name_assistant = sys_props.value("assistant_name", "");
-        if (slots.size() > 0)
+
        {
        notify_system_prompt_changed();
    }
    }
    static size_t find_stopping_strings(const std::string &text, const size_t last_token_size,
                                        const stop_type type, llama_client_slot &slot)