remove thread local variable: Windows does not recogonize it

This commit is contained in:
mqy 2023-06-28 04:51:55 +08:00
parent fef9eac856
commit 767d1db097

View File

@ -105,8 +105,6 @@ typedef pthread_t ggml_thread_t;
/// Copyright original authors. /// Copyright original authors.
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
_Thread_local int32_t thread_local_id;
#define MAX_THREADS 16 #define MAX_THREADS 16
struct task_allocator { struct task_allocator {
@ -159,8 +157,7 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx,
// all assigned? // all assigned?
if (atomic_load(&a->global_counter) == total_chunks) { if (atomic_load(&a->global_counter) == total_chunks) {
GGML_PRINT_DEBUG_5("[#_%d] %s(): nothing to do.\n", thread_local_id, GGML_PRINT_DEBUG("[#_%d] %s(): nothing to do.\n", ith, __func__);
__func__);
atomic_fetch_sub(&a->lock, 1); // unlock atomic_fetch_sub(&a->lock, 1); // unlock
return; return;
} }
@ -180,7 +177,7 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx,
atomic_fetch_add(&a->global_counter, 1); atomic_fetch_add(&a->global_counter, 1);
GGML_PRINT_DEBUG("[#_%d] %s(): take the %3d-th trunk of its own.\n", GGML_PRINT_DEBUG("[#_%d] %s(): take the %3d-th trunk of its own.\n",
thread_local_id, __func__, head + 1); ith, __func__, head + 1);
*chunk_idx = idx; *chunk_idx = idx;
*n_chunks = total_chunks; *n_chunks = total_chunks;
@ -205,8 +202,8 @@ static void allocate_chunk(struct task_allocator *a, int ith, int *chunk_idx,
atomic_fetch_sub(&a->thread_queue_tails[i], 1); atomic_fetch_sub(&a->thread_queue_tails[i], 1);
atomic_fetch_add(&a->global_counter, 1); atomic_fetch_add(&a->global_counter, 1);
GGML_PRINT_DEBUG("[#_%d] %s(): steal the %d-th trunk from #_%d\n", GGML_PRINT_DEBUG("[#_%d] %s(): steal the %d-th trunk from #_%d\n", ith,
thread_local_id, __func__, tail, i); __func__, tail, i);
*chunk_idx = idx; *chunk_idx = idx;
*n_chunks = total_chunks; *n_chunks = total_chunks;
@ -299,8 +296,6 @@ static thread_ret_t demo_compute_thread(void *data) {
int ith = state->ith; int ith = state->ith;
int n_threads = shared->n_threads; int n_threads = shared->n_threads;
thread_local_id = ith;
atomic_int *done_counter = &shared->done_counter; atomic_int *done_counter = &shared->done_counter;
for (int i = 0; i < shared->n_nodes; ++i) { for (int i = 0; i < shared->n_nodes; ++i) {
@ -352,10 +347,9 @@ static thread_ret_t demo_compute_thread(void *data) {
static void test_task_allocator(int n_threads, int n_nodes, int n_compute_units, static void test_task_allocator(int n_threads, int n_nodes, int n_compute_units,
int n_multiplier) { int n_multiplier) {
fprintf(stderr, fprintf(stderr,
"\n[#_%d] %s(): n_threads: %d, n_nodes: %d, n_compute_units: %d, " "\n%s(): n_threads: %d, n_nodes: %d, n_compute_units: %d, "
"n_multiplier: %d ===>\n\n", "n_multiplier: %d ===>\n\n",
thread_local_id, __func__, n_threads, n_nodes, n_compute_units, __func__, n_threads, n_nodes, n_compute_units, n_multiplier);
n_multiplier);
struct ggml_tensor *nodes = alloca(n_nodes * sizeof(struct ggml_tensor)); struct ggml_tensor *nodes = alloca(n_nodes * sizeof(struct ggml_tensor));