server : fix crash when prompt exceeds context size (#3996)

2024-12-25 02:44:36 +00:00 · 2023-11-11 05:48:21 +00:00 · 2023-11-11 05:48:21 +00:00 · d96ca7ded7
commit d96ca7ded7
parent 34b0a08207
1 changed files with 29 additions and 29 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1557,15 +1557,6 @@ struct llama_server_context
                    slot.num_prompt_tokens = prompt_tokens.size();
                    if (!slot.params.cache_prompt)
                    {
                        llama_sampling_reset(slot.ctx_sampling);
                        slot.n_past = 0;
                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
                    }
                    else
                    {
                    if (slot.params.n_keep < 0)
                    {
                        slot.params.n_keep = slot.num_prompt_tokens;
@ -1595,6 +1586,15 @@ struct llama_server_context
                        GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
                    }
                    if (!slot.params.cache_prompt)
                    {
                        llama_sampling_reset(slot.ctx_sampling);
                        slot.n_past = 0;
                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
                    }
                    else
                    {
                        // push the prompt into the sampling context (do not apply grammar)
                        for (auto &token : prompt_tokens)
                        {