From 286c5b30143c7540d947811d37199afde50819a3 Mon Sep 17 00:00:00 2001 From: mqy Date: Sun, 18 Jun 2023 20:01:58 +0800 Subject: [PATCH] threadng: remove unnecessary spin lock/unlock from suspend/resume; add more tests --- ggml-threading.c | 22 ++++------------------ tests/test-ggml-threading.c | 25 +++++++++++++++---------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/ggml-threading.c b/ggml-threading.c index fb02b4046..2a5cfa096 100644 --- a/ggml-threading.c +++ b/ggml-threading.c @@ -260,22 +260,17 @@ void ggml_threading_suspend(struct ggml_threading_context *ctx) { return; } - struct ggml_compute_state_shared *shared = &ctx->shared; - - ggml_spin_lock(&shared->spin); + PRINT_DEBUG("[main] wait_now will be set, expect %d workers wait\n", + n_worker_threads); ctx->shared.wait_now = true; - ggml_spin_unlock(&shared->spin); const int n_worker_threads = ctx->n_threads - 1; - while (ctx->shared.n_waiting != n_worker_threads) { ggml_spin_pause(); } - ggml_spin_lock(&shared->spin); - ctx->suspending = true; - ggml_spin_unlock(&shared->spin); PRINT_DEBUG("[main] saw %d workers waiting\n", n_worker_threads); + ctx->suspending = true; } // Wakeup all workers. @@ -291,7 +286,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) { } struct ggml_compute_state_shared *shared = &ctx->shared; - ggml_spin_lock(&shared->spin); int64_t perf_cycles_0 = 0; int64_t perf_time_0 = 0; @@ -307,8 +301,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) { shared->wait_now = false; while (shared->n_waiting != 0) { - ggml_spin_unlock(&shared->spin); - if (loop_counter > 0) { ggml_spin_pause(); if (loop_counter > 3) { @@ -326,8 +318,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) { GGML_ASSERT(pthread_cond_broadcast(&shared->cond) == 0); GGML_ASSERT(pthread_mutex_unlock(&shared->mutex) == 0); last_signal_time = ggml_time_us(); - - ggml_spin_lock(&shared->spin); } ctx->suspending = false; @@ -335,9 +325,7 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) { if (shared->ctx->features & GGML_THREADING_FEATURE_PERF) { ggml_perf_collect(&shared->ctx->wakeup_perf, perf_cycles_0, perf_time_0); - } - - ggml_spin_unlock(&shared->spin); + }; } bool ggml_threading_is_suspending(struct ggml_threading_context *ctx) { @@ -385,8 +373,6 @@ static void ggml_threading_setup_workers(struct ggml_threading_context *ctx, } } else if (current->wait) { if (shared->n_waiting < n_worker_threads) { - PRINT_DEBUG("[main] wait_now will be set, expect %d workers wait\n", - n_worker_threads); ggml_spin_unlock(&ctx->shared.spin); ggml_threading_suspend(ctx); ggml_spin_lock(&ctx->shared.spin); diff --git a/tests/test-ggml-threading.c b/tests/test-ggml-threading.c index f941f4dc3..cb2cca163 100644 --- a/tests/test-ggml-threading.c +++ b/tests/test-ggml-threading.c @@ -214,7 +214,7 @@ lifecycle_runner(const struct ggml_compute_params *params, } // Test thread lifecycle: start -> suspend -> resume -> stop -static int test_lifecycle(void) { +static int test_lifecycle(bool wait_on_done) { struct ggml_tensor node; memset(&node, 0, sizeof(struct ggml_tensor)); @@ -243,14 +243,15 @@ static int test_lifecycle(void) { int threads_arr_len = sizeof(threads_arr) / sizeof(threads_arr[0]); int n_threads = 1; + enum ggml_threading_features features = + wait_on_done ? GGML_THREADING_FEATURE_NONE + : GGML_THREADING_FEATURE_WAIT_ON_DONE; for (int i = 0; i < threads_arr_len; i++) { n_threads = threads_arr[i]; int start_time = (int)ggml_time_ms(); - ctx = ggml_threading_start( - n_threads, NULL, lifecycle_runner, - /*features*/ GGML_THREADING_FEATURE_WAIT_ON_DONE | - GGML_THREADING_FEATURE_PERF, - /*stages_time*/ NULL); + ctx = ggml_threading_start(n_threads, NULL, lifecycle_runner, + features | GGML_THREADING_FEATURE_PERF, + /*stages_time*/ NULL); int elapsed = (int)ggml_time_ms() - start_time; if (elapsed > 5 * n_threads) { printf("[test-ggml-threading] %s: it took %d ms to start %d worker " @@ -547,13 +548,17 @@ int main(void) { } // lifecycle. - { - printf("[test-ggml-threading] test lifecycle ...\n"); + for (int i = 0; i < 2; i++) { + bool wait_on_done = (i == 1); + printf("[test-ggml-threading] test lifecycle (want_on_done = %d) ...\n", + wait_on_done); ++n_tests; - if (test_lifecycle() == 0) { + if (test_lifecycle(wait_on_done) == 0) { ++n_passed; - printf("[test-ggml-threading] test lifecycle: ok\n\n"); + printf("[test-ggml-threading] test lifecycle (want_on_done = %d): " + "ok\n\n", + wait_on_done); } }