server : fix deadlock that occurs in multi-prompt scenarios (#4905)

* * fix deadlock

* * dont ruint all whitespace
This commit is contained in:
Ziad Ben Hadj-Alouane 2024-01-13 09:20:46 -05:00 committed by GitHub
parent ee8243adaa
commit 356327feb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1350,14 +1350,17 @@ struct llama_server_context
res.result_json["model"] = slot.oaicompat_model; res.result_json["model"] = slot.oaicompat_model;
} }
queue_results.push_back(res);
condition_results.notify_all();
// done with results, unlock
lock.unlock();
// parent multitask, if any, needs to be updated // parent multitask, if any, needs to be updated
if (slot.multitask_id != -1) if (slot.multitask_id != -1)
{ {
update_multi_task(slot.multitask_id, slot.task_id, res); update_multi_task(slot.multitask_id, slot.task_id, res);
} }
queue_results.push_back(res);
condition_results.notify_all();
} }
void send_embedding(llama_client_slot &slot) void send_embedding(llama_client_slot &slot)
@ -1603,6 +1606,7 @@ struct llama_server_context
} }
// remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue // remove finished multitasks from the queue of multitasks, and add the corresponding result to the result queue
std::vector<task_result> agg_results;
auto queue_iterator = queue_multitasks.begin(); auto queue_iterator = queue_multitasks.begin();
while (queue_iterator != queue_multitasks.end()) while (queue_iterator != queue_multitasks.end())
{ {
@ -1623,8 +1627,9 @@ struct llama_server_context
} }
aggregate_result.result_json = json{ "results", result_jsons }; aggregate_result.result_json = json{ "results", result_jsons };
std::lock_guard<std::mutex> lock(mutex_results);
queue_results.push_back(aggregate_result); agg_results.push_back(aggregate_result);
condition_results.notify_all(); condition_results.notify_all();
queue_iterator = queue_multitasks.erase(queue_iterator); queue_iterator = queue_multitasks.erase(queue_iterator);
@ -1634,6 +1639,13 @@ struct llama_server_context
++queue_iterator; ++queue_iterator;
} }
} }
// done with tasks, unlock
lock.unlock();
// copy aggregate results of complete multi-tasks to the results queue
std::lock_guard<std::mutex> lock_results(mutex_results);
queue_results.insert(queue_results.end(), agg_results.begin(), agg_results.end());
} }
bool update_slots() { bool update_slots() {