mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 18:51:45 +00:00
threadng: remove unnecessary spin lock/unlock from suspend/resume; add more tests
This commit is contained in:
parent
5feefb32b3
commit
286c5b3014
@ -260,22 +260,17 @@ void ggml_threading_suspend(struct ggml_threading_context *ctx) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_compute_state_shared *shared = &ctx->shared;
|
PRINT_DEBUG("[main] wait_now will be set, expect %d workers wait\n",
|
||||||
|
n_worker_threads);
|
||||||
ggml_spin_lock(&shared->spin);
|
|
||||||
ctx->shared.wait_now = true;
|
ctx->shared.wait_now = true;
|
||||||
ggml_spin_unlock(&shared->spin);
|
|
||||||
|
|
||||||
const int n_worker_threads = ctx->n_threads - 1;
|
const int n_worker_threads = ctx->n_threads - 1;
|
||||||
|
|
||||||
while (ctx->shared.n_waiting != n_worker_threads) {
|
while (ctx->shared.n_waiting != n_worker_threads) {
|
||||||
ggml_spin_pause();
|
ggml_spin_pause();
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_spin_lock(&shared->spin);
|
|
||||||
ctx->suspending = true;
|
|
||||||
ggml_spin_unlock(&shared->spin);
|
|
||||||
PRINT_DEBUG("[main] saw %d workers waiting\n", n_worker_threads);
|
PRINT_DEBUG("[main] saw %d workers waiting\n", n_worker_threads);
|
||||||
|
ctx->suspending = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wakeup all workers.
|
// Wakeup all workers.
|
||||||
@ -291,7 +286,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_compute_state_shared *shared = &ctx->shared;
|
struct ggml_compute_state_shared *shared = &ctx->shared;
|
||||||
ggml_spin_lock(&shared->spin);
|
|
||||||
|
|
||||||
int64_t perf_cycles_0 = 0;
|
int64_t perf_cycles_0 = 0;
|
||||||
int64_t perf_time_0 = 0;
|
int64_t perf_time_0 = 0;
|
||||||
@ -307,8 +301,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) {
|
|||||||
shared->wait_now = false;
|
shared->wait_now = false;
|
||||||
|
|
||||||
while (shared->n_waiting != 0) {
|
while (shared->n_waiting != 0) {
|
||||||
ggml_spin_unlock(&shared->spin);
|
|
||||||
|
|
||||||
if (loop_counter > 0) {
|
if (loop_counter > 0) {
|
||||||
ggml_spin_pause();
|
ggml_spin_pause();
|
||||||
if (loop_counter > 3) {
|
if (loop_counter > 3) {
|
||||||
@ -326,8 +318,6 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) {
|
|||||||
GGML_ASSERT(pthread_cond_broadcast(&shared->cond) == 0);
|
GGML_ASSERT(pthread_cond_broadcast(&shared->cond) == 0);
|
||||||
GGML_ASSERT(pthread_mutex_unlock(&shared->mutex) == 0);
|
GGML_ASSERT(pthread_mutex_unlock(&shared->mutex) == 0);
|
||||||
last_signal_time = ggml_time_us();
|
last_signal_time = ggml_time_us();
|
||||||
|
|
||||||
ggml_spin_lock(&shared->spin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->suspending = false;
|
ctx->suspending = false;
|
||||||
@ -335,9 +325,7 @@ void ggml_threading_resume(struct ggml_threading_context *ctx) {
|
|||||||
if (shared->ctx->features & GGML_THREADING_FEATURE_PERF) {
|
if (shared->ctx->features & GGML_THREADING_FEATURE_PERF) {
|
||||||
ggml_perf_collect(&shared->ctx->wakeup_perf, perf_cycles_0,
|
ggml_perf_collect(&shared->ctx->wakeup_perf, perf_cycles_0,
|
||||||
perf_time_0);
|
perf_time_0);
|
||||||
}
|
};
|
||||||
|
|
||||||
ggml_spin_unlock(&shared->spin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_threading_is_suspending(struct ggml_threading_context *ctx) {
|
bool ggml_threading_is_suspending(struct ggml_threading_context *ctx) {
|
||||||
@ -385,8 +373,6 @@ static void ggml_threading_setup_workers(struct ggml_threading_context *ctx,
|
|||||||
}
|
}
|
||||||
} else if (current->wait) {
|
} else if (current->wait) {
|
||||||
if (shared->n_waiting < n_worker_threads) {
|
if (shared->n_waiting < n_worker_threads) {
|
||||||
PRINT_DEBUG("[main] wait_now will be set, expect %d workers wait\n",
|
|
||||||
n_worker_threads);
|
|
||||||
ggml_spin_unlock(&ctx->shared.spin);
|
ggml_spin_unlock(&ctx->shared.spin);
|
||||||
ggml_threading_suspend(ctx);
|
ggml_threading_suspend(ctx);
|
||||||
ggml_spin_lock(&ctx->shared.spin);
|
ggml_spin_lock(&ctx->shared.spin);
|
||||||
|
@ -214,7 +214,7 @@ lifecycle_runner(const struct ggml_compute_params *params,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test thread lifecycle: start -> suspend -> resume -> stop
|
// Test thread lifecycle: start -> suspend -> resume -> stop
|
||||||
static int test_lifecycle(void) {
|
static int test_lifecycle(bool wait_on_done) {
|
||||||
struct ggml_tensor node;
|
struct ggml_tensor node;
|
||||||
memset(&node, 0, sizeof(struct ggml_tensor));
|
memset(&node, 0, sizeof(struct ggml_tensor));
|
||||||
|
|
||||||
@ -243,14 +243,15 @@ static int test_lifecycle(void) {
|
|||||||
int threads_arr_len = sizeof(threads_arr) / sizeof(threads_arr[0]);
|
int threads_arr_len = sizeof(threads_arr) / sizeof(threads_arr[0]);
|
||||||
int n_threads = 1;
|
int n_threads = 1;
|
||||||
|
|
||||||
|
enum ggml_threading_features features =
|
||||||
|
wait_on_done ? GGML_THREADING_FEATURE_NONE
|
||||||
|
: GGML_THREADING_FEATURE_WAIT_ON_DONE;
|
||||||
for (int i = 0; i < threads_arr_len; i++) {
|
for (int i = 0; i < threads_arr_len; i++) {
|
||||||
n_threads = threads_arr[i];
|
n_threads = threads_arr[i];
|
||||||
int start_time = (int)ggml_time_ms();
|
int start_time = (int)ggml_time_ms();
|
||||||
ctx = ggml_threading_start(
|
ctx = ggml_threading_start(n_threads, NULL, lifecycle_runner,
|
||||||
n_threads, NULL, lifecycle_runner,
|
features | GGML_THREADING_FEATURE_PERF,
|
||||||
/*features*/ GGML_THREADING_FEATURE_WAIT_ON_DONE |
|
/*stages_time*/ NULL);
|
||||||
GGML_THREADING_FEATURE_PERF,
|
|
||||||
/*stages_time*/ NULL);
|
|
||||||
int elapsed = (int)ggml_time_ms() - start_time;
|
int elapsed = (int)ggml_time_ms() - start_time;
|
||||||
if (elapsed > 5 * n_threads) {
|
if (elapsed > 5 * n_threads) {
|
||||||
printf("[test-ggml-threading] %s: it took %d ms to start %d worker "
|
printf("[test-ggml-threading] %s: it took %d ms to start %d worker "
|
||||||
@ -547,13 +548,17 @@ int main(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// lifecycle.
|
// lifecycle.
|
||||||
{
|
for (int i = 0; i < 2; i++) {
|
||||||
printf("[test-ggml-threading] test lifecycle ...\n");
|
bool wait_on_done = (i == 1);
|
||||||
|
printf("[test-ggml-threading] test lifecycle (want_on_done = %d) ...\n",
|
||||||
|
wait_on_done);
|
||||||
++n_tests;
|
++n_tests;
|
||||||
|
|
||||||
if (test_lifecycle() == 0) {
|
if (test_lifecycle(wait_on_done) == 0) {
|
||||||
++n_passed;
|
++n_passed;
|
||||||
printf("[test-ggml-threading] test lifecycle: ok\n\n");
|
printf("[test-ggml-threading] test lifecycle (want_on_done = %d): "
|
||||||
|
"ok\n\n",
|
||||||
|
wait_on_done);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user