mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 11:40:17 +00:00
server : clean-up completed tasks from waiting list (#9531)
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
ggml-ci
This commit is contained in:
parent
eca0fab44e
commit
6026da52d6
@ -531,26 +531,38 @@ struct server_response {
|
|||||||
|
|
||||||
// add the id_task to the list of tasks waiting for response
|
// add the id_task to the list of tasks waiting for response
|
||||||
void add_waiting_task_id(int id_task) {
|
void add_waiting_task_id(int id_task) {
|
||||||
SRV_DBG("waiting for task id = %d\n", id_task);
|
SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", id_task, (int) waiting_task_ids.size());
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
waiting_task_ids.insert(id_task);
|
waiting_task_ids.insert(id_task);
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_waiting_tasks(const std::vector<server_task> & tasks) {
|
void add_waiting_tasks(const std::vector<server_task> & tasks) {
|
||||||
for (const auto & t : tasks) {
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
add_waiting_task_id(t.id);
|
|
||||||
|
for (const auto & task : tasks) {
|
||||||
|
SRV_DBG("add task %d to waiting list. current waiting = %d (before add)\n", task.id, (int) waiting_task_ids.size());
|
||||||
|
waiting_task_ids.insert(task.id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// when the request is finished, we can remove task associated with it
|
// when the request is finished, we can remove task associated with it
|
||||||
void remove_waiting_task_id(int id_task) {
|
void remove_waiting_task_id(int id_task) {
|
||||||
SRV_DBG("task id = %d is done\n", id_task);
|
SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
waiting_task_ids.erase(id_task);
|
waiting_task_ids.erase(id_task);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void remove_waiting_task_ids(const std::unordered_set<int> & id_tasks) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
|
||||||
|
for (const auto & id_task : id_tasks) {
|
||||||
|
SRV_DBG("remove task %d from waiting list. current waiting = %d (before remove)\n", id_task, (int) waiting_task_ids.size());
|
||||||
|
waiting_task_ids.erase(id_task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This function blocks the thread until there is a response for one of the id_tasks
|
// This function blocks the thread until there is a response for one of the id_tasks
|
||||||
server_task_result recv(const std::unordered_set<int> & id_tasks) {
|
server_task_result recv(const std::unordered_set<int> & id_tasks) {
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -2774,6 +2786,8 @@ int main(int argc, char ** argv) {
|
|||||||
}, [&](const json & error_data) {
|
}, [&](const json & error_data) {
|
||||||
res_error(res, error_data);
|
res_error(res, error_data);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||||
} else {
|
} else {
|
||||||
const auto chunked_content_provider = [task_ids, &ctx_server](size_t, httplib::DataSink & sink) {
|
const auto chunked_content_provider = [task_ids, &ctx_server](size_t, httplib::DataSink & sink) {
|
||||||
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
|
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
|
||||||
@ -2784,7 +2798,12 @@ int main(int argc, char ** argv) {
|
|||||||
sink.done();
|
sink.done();
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
|
|
||||||
|
auto on_complete = [task_ids, &ctx_server] (bool) {
|
||||||
|
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||||
|
};
|
||||||
|
|
||||||
|
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2823,6 +2842,8 @@ int main(int argc, char ** argv) {
|
|||||||
}, [&](const json & error_data) {
|
}, [&](const json & error_data) {
|
||||||
res_error(res, error_data);
|
res_error(res, error_data);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||||
} else {
|
} else {
|
||||||
const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
|
const auto chunked_content_provider = [task_ids, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
|
||||||
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
|
ctx_server.receive_cmpl_results_stream(task_ids, [&](const server_task_result & result) -> bool {
|
||||||
@ -2844,7 +2865,12 @@ int main(int argc, char ** argv) {
|
|||||||
sink.done();
|
sink.done();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
|
|
||||||
|
auto on_complete = [task_ids, &ctx_server] (bool) {
|
||||||
|
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||||
|
};
|
||||||
|
|
||||||
|
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2953,6 +2979,8 @@ int main(int argc, char ** argv) {
|
|||||||
res_error(res, error_data);
|
res_error(res, error_data);
|
||||||
error = true;
|
error = true;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
|
Loading…
Reference in New Issue
Block a user