look forward more

2025-01-06 00:34:35 +00:00 · 2023-04-08 19:55:29 +08:00 · 2023-04-08 19:55:29 +08:00 · 3b03df5c05
commit 3b03df5c05
parent 921296c0d5
1 changed files with 38 additions and 44 deletions
--- a/ggml.c
+++ b/ggml.c
@ -9249,16 +9249,10 @@ typedef int ggml_lock_t;
 #endif
 struct ggml_compute_state_shared {
    int n_threads;
 };
 struct ggml_compute_state {
    struct ggml_compute_params params;
    struct ggml_tensor * node;
    struct ggml_compute_state_shared * shared;
 };
 static void ggml_graph_compute_thread(void * data) {
@ -9284,9 +9278,6 @@ static void ggml_graph_compute_thread(void * data) {
 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
    const int n_threads = cgraph->n_threads;
    struct ggml_compute_state_shared state_shared = {
        /*.n_threads =*/ n_threads,
    };
    struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
    // create thread pool
@ -9302,7 +9293,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                    .wdata = cgraph->work ? cgraph->work->data : NULL,
                },
                .node   = NULL,
                .shared = &state_shared,
            };
        }
    }
@ -9520,6 +9510,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
        struct ggml_tensor * node = cgraph->nodes[i];
        if (node->n_tasks == 0)
        {
            // no work need to be done.
            continue;
        }
        // TODO: this could be used to avoid unnecessary computations, but it needs to be improved
        //if (node->grad == NULL && node->perf_runs > 0) {
        //    continue;
@ -9558,46 +9553,45 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
        }
        else
        {
-            if (i + 1 < cgraph->n_nodes)
+            int start = i;
            int end = i + 1;
            while (end < cgraph->n_nodes && next_task < n_threads && (end - start) < n_threads * 2)
            {
-                struct ggml_tensor * next = cgraph->nodes[i + 1];
+                struct ggml_tensor * next = cgraph->nodes[end];
-                if (next->src0 != node && next->src1 != node && next->n_tasks == 1)
+                end++;
                {
                    workers[next_task].params = (struct ggml_compute_params) {
                        .type  = GGML_TASK_COMPUTE | GGML_TASK_INIT,
                        .ith   = 0,
                        .nth   = 1,
                        .wsize = 0,
                        .wdata = NULL,
                    };
                    workers[next_task].node = next;
                    thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]);
                    next_task++;
-                    if (i + 2 < cgraph->n_nodes)
+                if (next->n_tasks != 1)
                    continue;
                // check src depedency
                bool is_dep = false;
                for (int k = start; k < end; k++)
                {
                    struct ggml_tensor * node = cgraph->nodes[k];
                    if (next->src0 == node || next->src1 == node)
                    {
-                        struct ggml_tensor * prev = cgraph->nodes[i + 1];
+                        is_dep = true;
-                        struct ggml_tensor * next = cgraph->nodes[i + 2];
+                        break;
                        if (next->src0 != node && next->src1 != node && next->n_tasks == 1 &&
                            next->src0 != prev && next->src1 != prev
                        )
                        {
                            workers[next_task].params = (struct ggml_compute_params) {
                                .type  = GGML_TASK_COMPUTE | GGML_TASK_INIT,
                                .ith   = 0,
                                .nth   = 1,
                                .wsize = 0,
                                .wdata = NULL,
                            };
                            workers[next_task].node = next;
                            thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]);
                            next_task++;
                        }
                    }
                }
                if (is_dep)
                    continue;
                workers[next_task].params = (struct ggml_compute_params) {
                    .type  = GGML_TASK_COMPUTE | GGML_TASK_INIT,
                    .ith   = 0,
                    .nth   = 1,
                    .wsize = 0,
                    .wdata = NULL,
                };
                workers[next_task].node = next;
                thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[next_task]);
                next->n_tasks = 0; // indicate this node is caculated
                next_task++;
                //printf("Combine task [%d, %d]\n", start, end);
            }
        }
        params.type = GGML_TASK_COMPUTE;