server : fix infill when prompt is empty (#4833)

2024-12-24 10:24:35 +00:00 · 2024-01-11 23:23:49 +02:00 · 2024-01-11 23:23:49 +02:00 · 1d118386fe
commit 1d118386fe
parent 7edefbd79c
1 changed files with 8 additions and 7 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1406,7 +1406,7 @@ struct llama_server_context
        task.multitask_id = multitask_id;

        // when a completion task's prompt array is not a singleton, we split it into multiple requests
-        if (task.data.at("prompt").size() > 1)
+        if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
        {
            lock.unlock(); // entering new func scope
            return split_multiprompt_task(task);
@ -1731,7 +1731,8 @@ struct llama_server_context
                const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();

                // empty prompt passed -> release the slot and send empty response
-                if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
+                // note: infill mode allows empty prompt
+                if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill)
                {
                    slot.release();
                    slot.print_timings();