Update server.cpp

2024-11-11 21:39:52 +00:00 · 2024-05-12 12:06:36 +02:00 · 2024-05-12 12:06:36 +02:00 · f547c4f54a
commit f547c4f54a
parent b228aba91a
1 changed files with 18 additions and 1 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2029,7 +2029,24 @@ struct server_context {
                                // reuse any previously computed tokens that are common with the new prompt
                                slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
-
+                                if (slot.ga_n != 1)
                                {
                                    int ga_i = 0;
                                    int32_t ga_n = slot.ga_n;
                                    int32_t ga_w = slot.ga_w;
                                    int32_t slot_npast = 0;
                                    for (int k = 0; k < slot.n_past; ++k)
                                    {
                                        while (slot_npast >= ga_i + ga_w) {
                                            const int bd = (ga_w/ga_n)*(ga_n - 1);
                                            slot_npast -= bd;
                                            ga_i += ga_w/ga_n;
                                        }
                                        slot_npast++;
                                    }
                                    slot.n_past_se = slot_npast;
                                    slot.ga_i = ga_i;
                                }
                                // push the prompt into the sampling context (do not apply grammar)
                                for (int i = 0; i < slot.n_past; ++i) {
                                    llama_sampling_accept(slot.ctx_sampling, ctx, slot.cache_tokens[i], false);