server : no need for atomic int - already using mutex

This commit is contained in:
Georgi Gerganov 2023-10-20 20:44:29 +03:00
parent 113dd60005
commit 5d540e80d1
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -23,7 +23,6 @@
#include <cstddef> #include <cstddef>
#include <thread> #include <thread>
#include <atomic>
#include <mutex> #include <mutex>
#include <chrono> #include <chrono>
@ -551,7 +550,7 @@ struct llama_server_context
int max_ctx_per_slot = -1; int max_ctx_per_slot = -1;
bool clean_kv_cache = true; bool clean_kv_cache = true;
std::atomic<int> id_gen; int id_gen;
std::vector<task_server> queue_tasks; std::vector<task_server> queue_tasks;
std::vector<task_result> queue_results; std::vector<task_result> queue_results;
@ -611,7 +610,8 @@ struct llama_server_context
} }
void initialize() { void initialize() {
id_gen.store(0); // reset ids to 0 id_gen = 0;
// create slots // create slots
all_slots_are_idle = true; all_slots_are_idle = true;
if(max_ctx_per_slot == -1) { if(max_ctx_per_slot == -1) {
@ -1268,8 +1268,7 @@ struct llama_server_context
int request_completion(json data, bool infill) { int request_completion(json data, bool infill) {
std::lock_guard<std::mutex> lock(mutex_tasks); std::lock_guard<std::mutex> lock(mutex_tasks);
task_server task; task_server task;
task.id = id_gen.load(); task.id = id_gen++;
id_gen.fetch_add(1); // increment id generator
task.data = data; task.data = data;
task.infill_mode = infill; task.infill_mode = infill;
task.type = COMPLETION_TASK; task.type = COMPLETION_TASK;
@ -1366,8 +1365,7 @@ struct llama_server_context
void request_cancel(int task_id) { void request_cancel(int task_id) {
std::lock_guard<std::mutex> lock(mutex_tasks); std::lock_guard<std::mutex> lock(mutex_tasks);
task_server task; task_server task;
task.id = id_gen.load(); task.id = id_gen++;
id_gen.fetch_add(1); // increment id generator
task.type = CANCEL_TASK; task.type = CANCEL_TASK;
task.target_id = task_id; task.target_id = task_id;
queue_tasks.push_back(task); queue_tasks.push_back(task);