diff --git a/examples/task-allocator/task-allocator.c b/examples/task-allocator/task-allocator.c index fc596a27a..8c376c90b 100644 --- a/examples/task-allocator/task-allocator.c +++ b/examples/task-allocator/task-allocator.c @@ -105,8 +105,6 @@ typedef pthread_t ggml_thread_t; /// Copyright original authors. //----------------------------------------------------------------------------- -_Thread_local int32_t thread_local_id; - #define MAX_THREADS 16 struct task_allocator { @@ -159,8 +157,7 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx, // all assigned? if (atomic_load(&a->global_counter) == total_chunks) { - GGML_PRINT_DEBUG_5("[#_%d] %s(): nothing to do.\n", thread_local_id, - __func__); + GGML_PRINT_DEBUG("[#_%d] %s(): nothing to do.\n", ith, __func__); atomic_fetch_sub(&a->lock, 1); // unlock return; } @@ -180,7 +177,7 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx, atomic_fetch_add(&a->global_counter, 1); GGML_PRINT_DEBUG("[#_%d] %s(): take the %3d-th trunk of its own.\n", - thread_local_id, __func__, head + 1); + ith, __func__, head + 1); *chunk_idx = idx; *n_chunks = total_chunks; @@ -205,8 +202,8 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx, atomic_fetch_sub(&a->thread_queue_tails[i], 1); atomic_fetch_add(&a->global_counter, 1); - GGML_PRINT_DEBUG("[#_%d] %s(): steal the %d-th trunk from #_%d\n", - thread_local_id, __func__, tail, i); + GGML_PRINT_DEBUG("[#_%d] %s(): steal the %d-th trunk from #_%d\n", ith, + __func__, tail, i); *chunk_idx = idx; *n_chunks = total_chunks; @@ -299,8 +296,6 @@ static thread_ret_t demo_compute_thread(void *data) { int ith = state->ith; int n_threads = shared->n_threads; - thread_local_id = ith; - atomic_int *done_counter = &shared->done_counter; for (int i = 0; i < shared->n_nodes; ++i) { @@ -352,10 +347,9 @@ static thread_ret_t demo_compute_thread(void *data) { static void test_task_allocator(int n_threads, int n_nodes, int n_compute_units, int n_multiplier) { fprintf(stderr, - "\n[#_%d] %s(): n_threads: %d, n_nodes: %d, n_compute_units: %d, " + "\n%s(): n_threads: %d, n_nodes: %d, n_compute_units: %d, " "n_multiplier: %d ===>\n\n", - thread_local_id, __func__, n_threads, n_nodes, n_compute_units, - n_multiplier); + __func__, n_threads, n_nodes, n_compute_units, n_multiplier); struct ggml_tensor *nodes = alloca(n_nodes * sizeof(struct ggml_tensor));