server : fix infill when prompt is empty (#4833)

This commit is contained in:
Georgi Gerganov 2024-01-11 23:23:49 +02:00 committed by GitHub
parent 7edefbd79c
commit 1d118386fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1406,7 +1406,7 @@ struct llama_server_context
task.multitask_id = multitask_id;
// when a completion task's prompt array is not a singleton, we split it into multiple requests
if (task.data.at("prompt").size() > 1)
if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
{
lock.unlock(); // entering new func scope
return split_multiprompt_task(task);
@ -1731,7 +1731,8 @@ struct llama_server_context
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
// empty prompt passed -> release the slot and send empty response
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
// note: infill mode allows empty prompt
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill)
{
slot.release();
slot.print_timings();