mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
ggml : limit n_threads to the max n_tasks (#5238)
This commit is contained in:
parent
f8e9140cb4
commit
dabcc5b471
6
ggml.c
6
ggml.c
@ -16985,12 +16985,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||||||
struct ggml_cplan cplan;
|
struct ggml_cplan cplan;
|
||||||
memset(&cplan, 0, sizeof(struct ggml_cplan));
|
memset(&cplan, 0, sizeof(struct ggml_cplan));
|
||||||
|
|
||||||
|
int max_tasks = 1;
|
||||||
|
|
||||||
// thread scheduling for the different operations + work buffer size estimation
|
// thread scheduling for the different operations + work buffer size estimation
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
struct ggml_tensor * node = cgraph->nodes[i];
|
struct ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
||||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||||
|
|
||||||
|
max_tasks = MAX(max_tasks, n_tasks);
|
||||||
|
|
||||||
size_t cur = 0;
|
size_t cur = 0;
|
||||||
|
|
||||||
switch (node->op) {
|
switch (node->op) {
|
||||||
@ -17157,7 +17161,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||||||
work_size += CACHE_LINE_SIZE*(n_threads - 1);
|
work_size += CACHE_LINE_SIZE*(n_threads - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
cplan.n_threads = n_threads;
|
cplan.n_threads = MIN(max_tasks, n_threads);
|
||||||
cplan.work_size = work_size;
|
cplan.work_size = work_size;
|
||||||
cplan.work_data = NULL;
|
cplan.work_data = NULL;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user