From 3b03df5c05b0cabbf923d7ee3bf89ceca9a17b12 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Sat, 8 Apr 2023 19:55:29 +0800 Subject: [PATCH] look forward more --- ggml.c | 82 +++++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 44 deletions(-) diff --git a/ggml.c b/ggml.c index 93f034d8f..d5a190f34 100644 --- a/ggml.c +++ b/ggml.c @@ -9249,16 +9249,10 @@ typedef int ggml_lock_t; #endif -struct ggml_compute_state_shared { - int n_threads; -}; - struct ggml_compute_state { struct ggml_compute_params params; struct ggml_tensor * node; - - struct ggml_compute_state_shared * shared; }; static void ggml_graph_compute_thread(void * data) { @@ -9284,9 +9278,6 @@ static void ggml_graph_compute_thread(void * data) { void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { const int n_threads = cgraph->n_threads; - struct ggml_compute_state_shared state_shared = { - /*.n_threads =*/ n_threads, - }; struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL; // create thread pool @@ -9302,7 +9293,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) .wdata = cgraph->work ? cgraph->work->data : NULL, }, .node = NULL, - .shared = &state_shared, }; } } @@ -9520,6 +9510,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) struct ggml_tensor * node = cgraph->nodes[i]; + if (node->n_tasks == 0) + { + // no work need to be done. + continue; + } // TODO: this could be used to avoid unnecessary computations, but it needs to be improved //if (node->grad == NULL && node->perf_runs > 0) { // continue; @@ -9558,46 +9553,45 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } else { - if (i + 1 < cgraph->n_nodes) + int start = i; + int end = i + 1; + while (end < cgraph->n_nodes && next_task < n_threads && (end - start) < n_threads * 2) { - struct ggml_tensor * next = cgraph->nodes[i + 1]; - if (next->src0 != node && next->src1 != node && next->n_tasks == 1) - { - workers[next_task].params = (struct ggml_compute_params) { - .type = GGML_TASK_COMPUTE | GGML_TASK_INIT, - .ith = 0, - .nth = 1, - .wsize = 0, - .wdata = NULL, - }; - workers[next_task].node = next; - thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]); - next_task++; + struct ggml_tensor * next = cgraph->nodes[end]; + end++; - if (i + 2 < cgraph->n_nodes) + if (next->n_tasks != 1) + continue; + + // check src depedency + bool is_dep = false; + for (int k = start; k < end; k++) + { + struct ggml_tensor * node = cgraph->nodes[k]; + if (next->src0 == node || next->src1 == node) { - struct ggml_tensor * prev = cgraph->nodes[i + 1]; - struct ggml_tensor * next = cgraph->nodes[i + 2]; - if (next->src0 != node && next->src1 != node && next->n_tasks == 1 && - next->src0 != prev && next->src1 != prev - ) - { - workers[next_task].params = (struct ggml_compute_params) { - .type = GGML_TASK_COMPUTE | GGML_TASK_INIT, - .ith = 0, - .nth = 1, - .wsize = 0, - .wdata = NULL, - }; - workers[next_task].node = next; - thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]); - next_task++; - } + is_dep = true; + break; } } + + if (is_dep) + continue; + + workers[next_task].params = (struct ggml_compute_params) { + .type = GGML_TASK_COMPUTE | GGML_TASK_INIT, + .ith = 0, + .nth = 1, + .wsize = 0, + .wdata = NULL, + }; + workers[next_task].node = next; + + thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]); + next->n_tasks = 0; // indicate this node is caculated + next_task++; + //printf("Combine task [%d, %d]\n", start, end); } - - } params.type = GGML_TASK_COMPUTE;