mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 09:11:46 +00:00
using github Pithikos/C-Thread-Pool for threading
This commit is contained in:
parent
21e88c8b0f
commit
a65d37ad36
@ -227,7 +227,10 @@ endif()
|
|||||||
|
|
||||||
add_library(ggml OBJECT
|
add_library(ggml OBJECT
|
||||||
ggml.c
|
ggml.c
|
||||||
ggml.h)
|
ggml.h
|
||||||
|
thpool.c
|
||||||
|
thpool.h
|
||||||
|
)
|
||||||
|
|
||||||
target_include_directories(ggml PUBLIC .)
|
target_include_directories(ggml PUBLIC .)
|
||||||
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
||||||
|
17
Makefile
17
Makefile
@ -225,6 +225,9 @@ default: main quantize perplexity embedding
|
|||||||
# Build library
|
# Build library
|
||||||
#
|
#
|
||||||
|
|
||||||
|
thpool.o: thpool.c thpool.h
|
||||||
|
$(CC) $(CFLAGS) -c thpool.c -o thpool.o
|
||||||
|
|
||||||
ggml.o: ggml.c ggml.h
|
ggml.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
||||||
|
|
||||||
@ -237,20 +240,20 @@ common.o: examples/common.cpp examples/common.h
|
|||||||
clean:
|
clean:
|
||||||
rm -vf *.o main quantize perplexity embedding
|
rm -vf *.o main quantize perplexity embedding
|
||||||
|
|
||||||
main: examples/main/main.cpp ggml.o llama.o common.o
|
main: examples/main/main.cpp thpool.o ggml.o llama.o common.o
|
||||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/main/main.cpp thpool.o ggml.o llama.o common.o -o main $(LDFLAGS)
|
||||||
@echo
|
@echo
|
||||||
@echo '==== Run ./main -h for help. ===='
|
@echo '==== Run ./main -h for help. ===='
|
||||||
@echo
|
@echo
|
||||||
|
|
||||||
quantize: examples/quantize/quantize.cpp ggml.o llama.o
|
quantize: examples/quantize/quantize.cpp thpool.o ggml.o llama.o
|
||||||
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp thpool.o ggml.o llama.o -o quantize $(LDFLAGS)
|
||||||
|
|
||||||
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
|
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
|
||||||
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp thpool.o ggml.o llama.o common.o -o perplexity $(LDFLAGS)
|
||||||
|
|
||||||
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
|
embedding: examples/embedding/embedding.cpp thpool.o ggml.o llama.o common.o
|
||||||
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp thpool.o ggml.o llama.o common.o -o embedding $(LDFLAGS)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Tests
|
# Tests
|
||||||
|
234
ggml.c
234
ggml.c
@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#include "thpool.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||||
@ -72,6 +74,59 @@ static int sched_yield (void) {
|
|||||||
Sleep (0);
|
Sleep (0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct pthread_mutex_tag {
|
||||||
|
CRITICAL_SECTION critical_section;
|
||||||
|
} pthread_mutex_t;
|
||||||
|
|
||||||
|
typedef struct pthread_mutexattr_tag {
|
||||||
|
int attr;
|
||||||
|
} pthread_mutexattr_t;
|
||||||
|
|
||||||
|
int pthread_mutex_init(pthread_mutex_t * mutex, const pthread_mutexattr_t * attr) {
|
||||||
|
InitializeCriticalSection (&mutex->critical_section);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pthread_mutex_destroy(pthread_mutex_t * mutex) {
|
||||||
|
DeleteCriticalSection(&mutex->critical_section);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int pthread_mutex_lock(pthread_mutex_t * mutex) {
|
||||||
|
EnterCriticalSection(&mutex->critical_section);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pthread_mutex_unlock(pthread_mutex_t * mutex) {
|
||||||
|
LeaveCriticalSection(&mutex->critical_section);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct pthread_cond_tag {
|
||||||
|
CONDITION_VARIABLE cond;
|
||||||
|
} pthread_cond_t;
|
||||||
|
|
||||||
|
int pthread_cond_init(pthread_cond_t * cond, void * unused) {
|
||||||
|
InitializeConditionVariable (&cond->cond);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pthread_cond_destroy(pthread_cond_t * cond) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pthread_cond_wait(pthread_cond_t * cond, pthread_mutex_t * mutex) {
|
||||||
|
SleepConditionVariableCS(&cond->cond, &mutex->critical_section, INFINITE);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pthread_cond_broadcast(pthread_cond_t * cond) {
|
||||||
|
WakeAllConditionVariable(&cond->cond);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdatomic.h>
|
#include <stdatomic.h>
|
||||||
@ -2538,6 +2593,7 @@ struct ggml_context {
|
|||||||
|
|
||||||
struct ggml_scratch scratch;
|
struct ggml_scratch scratch;
|
||||||
struct ggml_scratch scratch_save;
|
struct ggml_scratch scratch_save;
|
||||||
|
threadpool tpool;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_context_container {
|
struct ggml_context_container {
|
||||||
@ -2822,6 +2878,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
/*.objects_end =*/ NULL,
|
/*.objects_end =*/ NULL,
|
||||||
/*.scratch =*/ { 0, 0, NULL, },
|
/*.scratch =*/ { 0, 0, NULL, },
|
||||||
/*.scratch_save =*/ { 0, 0, NULL, },
|
/*.scratch_save =*/ { 0, 0, NULL, },
|
||||||
|
/*.thpool =*/ NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure
|
GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure
|
||||||
@ -8954,6 +9011,19 @@ typedef pthread_t ggml_thread_t;
|
|||||||
#define ggml_thread_create pthread_create
|
#define ggml_thread_create pthread_create
|
||||||
#define ggml_thread_join pthread_join
|
#define ggml_thread_join pthread_join
|
||||||
|
|
||||||
|
typedef pthread_mutex_t ggml_mutex_t;
|
||||||
|
typedef pthread_cond_t ggml_cond_t;
|
||||||
|
|
||||||
|
#define ggml_mutex_init pthread_mutex_init
|
||||||
|
#define ggml_mutex_destroy pthread_mutex_destroy
|
||||||
|
#define ggml_cond_init pthread_cond_init
|
||||||
|
#define ggml_cond_destroy pthread_cond_destroy
|
||||||
|
|
||||||
|
#define ggml_mutex_lock pthread_mutex_lock
|
||||||
|
#define ggml_mutex_unlock pthread_mutex_unlock
|
||||||
|
#define ggml_cond_broadcast pthread_cond_broadcast
|
||||||
|
#define ggml_cond_wait pthread_cond_wait
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
//typedef pthread_spinlock_t ggml_lock_t;
|
//typedef pthread_spinlock_t ggml_lock_t;
|
||||||
@ -8977,17 +9047,31 @@ typedef pthread_t ggml_thread_t;
|
|||||||
#define ggml_thread_create pthread_create
|
#define ggml_thread_create pthread_create
|
||||||
#define ggml_thread_join pthread_join
|
#define ggml_thread_join pthread_join
|
||||||
|
|
||||||
|
typedef pthread_mutex_t ggml_mutex_t;
|
||||||
|
typedef pthread_cond_t ggml_cond_t;
|
||||||
|
|
||||||
|
#define ggml_mutex_init pthread_mutex_init
|
||||||
|
#define ggml_mutex_destroy pthread_mutex_destroy
|
||||||
|
#define ggml_cond_init pthread_cond_init
|
||||||
|
#define ggml_cond_destroy pthread_cond_destroy
|
||||||
|
|
||||||
|
#define ggml_mutex_lock pthread_mutex_lock
|
||||||
|
#define ggml_mutex_unlock pthread_mutex_unlock
|
||||||
|
#define ggml_cond_broadcast pthread_cond_broadcast
|
||||||
|
#define ggml_cond_wait pthread_cond_wait
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct ggml_compute_state_shared {
|
struct ggml_compute_state_shared {
|
||||||
ggml_lock_t spin;
|
|
||||||
|
|
||||||
int n_threads;
|
int n_threads;
|
||||||
|
|
||||||
// synchronization primitives
|
// synchronization primitives
|
||||||
atomic_int n_ready;
|
int n_ready;
|
||||||
atomic_bool has_work;
|
bool has_work;
|
||||||
atomic_bool stop; // stop all threads
|
bool stop; // stop all threads
|
||||||
|
ggml_mutex_t mutex;
|
||||||
|
ggml_cond_t cond;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_compute_state {
|
struct ggml_compute_state {
|
||||||
@ -8999,72 +9083,31 @@ struct ggml_compute_state {
|
|||||||
struct ggml_compute_state_shared * shared;
|
struct ggml_compute_state_shared * shared;
|
||||||
};
|
};
|
||||||
|
|
||||||
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
static void ggml_graph_compute_thread(void * data) {
|
||||||
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
||||||
|
if (state->node) {
|
||||||
const int n_threads = state->shared->n_threads;
|
if (state->params.ith < state->params.nth) {
|
||||||
|
ggml_compute_forward(&state->params, state->node);
|
||||||
while (true) {
|
|
||||||
if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
|
|
||||||
atomic_store(&state->shared->has_work, false);
|
|
||||||
} else {
|
|
||||||
while (atomic_load(&state->shared->has_work)) {
|
|
||||||
if (atomic_load(&state->shared->stop)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
ggml_lock_lock (&state->shared->spin);
|
|
||||||
ggml_lock_unlock(&state->shared->spin);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_fetch_sub(&state->shared->n_ready, 1);
|
|
||||||
|
|
||||||
// wait for work
|
|
||||||
while (!atomic_load(&state->shared->has_work)) {
|
|
||||||
if (atomic_load(&state->shared->stop)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
ggml_lock_lock (&state->shared->spin);
|
|
||||||
ggml_lock_unlock(&state->shared->spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if we should stop
|
|
||||||
if (atomic_load(&state->shared->stop)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state->node) {
|
|
||||||
if (state->params.ith < state->params.nth) {
|
|
||||||
ggml_compute_forward(&state->params, state->node);
|
|
||||||
}
|
|
||||||
|
|
||||||
state->node = NULL;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
state->node = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
||||||
const int n_threads = cgraph->n_threads;
|
const int n_threads = cgraph->n_threads;
|
||||||
|
|
||||||
struct ggml_compute_state_shared state_shared = {
|
struct ggml_compute_state_shared state_shared = {
|
||||||
/*.spin =*/ GGML_LOCK_INITIALIZER,
|
|
||||||
/*.n_threads =*/ n_threads,
|
/*.n_threads =*/ n_threads,
|
||||||
/*.n_ready =*/ 0,
|
/*.n_ready =*/ 0,
|
||||||
/*.has_work =*/ false,
|
/*.has_work =*/ false,
|
||||||
/*.stop =*/ false,
|
/*.stop =*/ false,
|
||||||
|
/*.mutex =*/ {0},
|
||||||
|
/*.cond =*/ {0},
|
||||||
};
|
};
|
||||||
struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
|
struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
|
||||||
|
|
||||||
// create thread pool
|
// create thread pool
|
||||||
if (n_threads > 1) {
|
if (n_threads > 1) {
|
||||||
ggml_lock_init(&state_shared.spin);
|
ctx->tpool = thpool_init(n_threads);
|
||||||
|
|
||||||
atomic_store(&state_shared.has_work, true);
|
|
||||||
|
|
||||||
for (int j = 0; j < n_threads - 1; j++) {
|
for (int j = 0; j < n_threads - 1; j++) {
|
||||||
workers[j] = (struct ggml_compute_state) {
|
workers[j] = (struct ggml_compute_state) {
|
||||||
.thrd = 0,
|
.thrd = 0,
|
||||||
@ -9078,10 +9121,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
.node = NULL,
|
.node = NULL,
|
||||||
.shared = &state_shared,
|
.shared = &state_shared,
|
||||||
};
|
};
|
||||||
|
|
||||||
int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
|
||||||
GGML_ASSERT(rc == 0);
|
|
||||||
UNUSED(rc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -9319,15 +9358,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
// COMPUTE
|
// COMPUTE
|
||||||
if (node->n_tasks > 1) {
|
if (node->n_tasks > 1) {
|
||||||
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
|
|
||||||
atomic_store(&state_shared.has_work, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.has_work)) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
// launch thread pool
|
// launch thread pool
|
||||||
for (int j = 0; j < n_threads - 1; j++) {
|
for (int j = 0; j < n_threads - 1; j++) {
|
||||||
workers[j].params = (struct ggml_compute_params) {
|
workers[j].params = (struct ggml_compute_params) {
|
||||||
@ -9338,16 +9368,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
||||||
};
|
};
|
||||||
workers[j].node = node;
|
workers[j].node = node;
|
||||||
|
thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_fetch_sub(&state_shared.n_ready, 1);
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.n_ready) > 0) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_store(&state_shared.has_work, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params.type = GGML_TASK_COMPUTE;
|
params.type = GGML_TASK_COMPUTE;
|
||||||
@ -9355,34 +9377,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
// wait for thread pool
|
// wait for thread pool
|
||||||
if (node->n_tasks > 1) {
|
if (node->n_tasks > 1) {
|
||||||
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
|
thpool_wait(ctx->tpool);
|
||||||
atomic_store(&state_shared.has_work, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.has_work)) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_fetch_sub(&state_shared.n_ready, 1);
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.n_ready) != 0) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FINALIZE
|
// FINALIZE
|
||||||
if (node->n_tasks > 1) {
|
if (node->n_tasks > 1) {
|
||||||
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
|
|
||||||
atomic_store(&state_shared.has_work, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.has_work)) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
// launch thread pool
|
// launch thread pool
|
||||||
for (int j = 0; j < n_threads - 1; j++) {
|
for (int j = 0; j < n_threads - 1; j++) {
|
||||||
workers[j].params = (struct ggml_compute_params) {
|
workers[j].params = (struct ggml_compute_params) {
|
||||||
@ -9393,16 +9392,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
||||||
};
|
};
|
||||||
workers[j].node = node;
|
workers[j].node = node;
|
||||||
|
thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_fetch_sub(&state_shared.n_ready, 1);
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.n_ready) > 0) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_store(&state_shared.has_work, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params.type = GGML_TASK_FINALIZE;
|
params.type = GGML_TASK_FINALIZE;
|
||||||
@ -9410,21 +9401,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
// wait for thread pool
|
// wait for thread pool
|
||||||
if (node->n_tasks > 1) {
|
if (node->n_tasks > 1) {
|
||||||
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
|
thpool_wait(ctx->tpool);
|
||||||
atomic_store(&state_shared.has_work, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.has_work)) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_fetch_sub(&state_shared.n_ready, 1);
|
|
||||||
|
|
||||||
while (atomic_load(&state_shared.n_ready) != 0) {
|
|
||||||
ggml_lock_lock (&state_shared.spin);
|
|
||||||
ggml_lock_unlock(&state_shared.spin);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// performance stats (node)
|
// performance stats (node)
|
||||||
@ -9440,16 +9417,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
// join thread pool
|
// join thread pool
|
||||||
if (n_threads > 1) {
|
if (n_threads > 1) {
|
||||||
atomic_store(&state_shared.stop, true);
|
thpool_destroy(ctx->tpool);
|
||||||
atomic_store(&state_shared.has_work, true);
|
|
||||||
|
|
||||||
for (int j = 0; j < n_threads - 1; j++) {
|
|
||||||
int rc = ggml_thread_join(workers[j].thrd, NULL);
|
|
||||||
GGML_ASSERT(rc == 0);
|
|
||||||
UNUSED(rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
ggml_lock_destroy(&state_shared.spin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// performance stats (graph)
|
// performance stats (graph)
|
||||||
|
553
thpool.c
Normal file
553
thpool.c
Normal file
@ -0,0 +1,553 @@
|
|||||||
|
/* ********************************
|
||||||
|
* Author: Johan Hanssen Seferidis
|
||||||
|
* License: MIT
|
||||||
|
* Description: Library providing a threading pool where you can add
|
||||||
|
* work. For usage, check the thpool.h file or README.md
|
||||||
|
*
|
||||||
|
*//** @file thpool.h *//*
|
||||||
|
*
|
||||||
|
********************************/
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#include <AvailabilityMacros.h>
|
||||||
|
#else
|
||||||
|
#ifndef _POSIX_C_SOURCE
|
||||||
|
#define _POSIX_C_SOURCE 200809L
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <time.h>
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <sys/prctl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "thpool.h"
|
||||||
|
|
||||||
|
#ifdef THPOOL_DEBUG
|
||||||
|
#define THPOOL_DEBUG 1
|
||||||
|
#else
|
||||||
|
#define THPOOL_DEBUG 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(DISABLE_PRINT) || defined(THPOOL_DEBUG)
|
||||||
|
#define err(str) fprintf(stderr, str)
|
||||||
|
#else
|
||||||
|
#define err(str)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static volatile int threads_keepalive;
|
||||||
|
static volatile int threads_on_hold;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================== STRUCTURES ============================ */
|
||||||
|
|
||||||
|
|
||||||
|
/* Binary semaphore */
|
||||||
|
typedef struct bsem {
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
pthread_cond_t cond;
|
||||||
|
int v;
|
||||||
|
} bsem;
|
||||||
|
|
||||||
|
|
||||||
|
/* Job */
|
||||||
|
typedef struct job{
|
||||||
|
struct job* prev; /* pointer to previous job */
|
||||||
|
void (*function)(void* arg); /* function pointer */
|
||||||
|
void* arg; /* function's argument */
|
||||||
|
} job;
|
||||||
|
|
||||||
|
|
||||||
|
/* Job queue */
|
||||||
|
typedef struct jobqueue{
|
||||||
|
pthread_mutex_t rwmutex; /* used for queue r/w access */
|
||||||
|
job *front; /* pointer to front of queue */
|
||||||
|
job *rear; /* pointer to rear of queue */
|
||||||
|
bsem *has_jobs; /* flag as binary semaphore */
|
||||||
|
int len; /* number of jobs in queue */
|
||||||
|
} jobqueue;
|
||||||
|
|
||||||
|
|
||||||
|
/* Thread */
|
||||||
|
typedef struct thread{
|
||||||
|
int id; /* friendly id */
|
||||||
|
pthread_t pthread; /* pointer to actual thread */
|
||||||
|
struct thpool_* thpool_p; /* access to thpool */
|
||||||
|
} thread;
|
||||||
|
|
||||||
|
|
||||||
|
/* Threadpool */
|
||||||
|
typedef struct thpool_{
|
||||||
|
thread** threads; /* pointer to threads */
|
||||||
|
volatile int num_threads_alive; /* threads currently alive */
|
||||||
|
volatile int num_threads_working; /* threads currently working */
|
||||||
|
pthread_mutex_t thcount_lock; /* used for thread count etc */
|
||||||
|
pthread_cond_t threads_all_idle; /* signal to thpool_wait */
|
||||||
|
jobqueue jobqueue; /* job queue */
|
||||||
|
} thpool_;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================== PROTOTYPES ============================ */
|
||||||
|
|
||||||
|
|
||||||
|
static int thread_init(thpool_* thpool_p, struct thread** thread_p, int id);
|
||||||
|
static void* thread_do(struct thread* thread_p);
|
||||||
|
static void thread_hold(int sig_id);
|
||||||
|
static void thread_destroy(struct thread* thread_p);
|
||||||
|
|
||||||
|
static int jobqueue_init(jobqueue* jobqueue_p);
|
||||||
|
static void jobqueue_clear(jobqueue* jobqueue_p);
|
||||||
|
static void jobqueue_push(jobqueue* jobqueue_p, struct job* newjob_p);
|
||||||
|
static struct job* jobqueue_pull(jobqueue* jobqueue_p);
|
||||||
|
static void jobqueue_destroy(jobqueue* jobqueue_p);
|
||||||
|
|
||||||
|
static void bsem_init(struct bsem *bsem_p, int value);
|
||||||
|
static void bsem_reset(struct bsem *bsem_p);
|
||||||
|
static void bsem_post(struct bsem *bsem_p);
|
||||||
|
static void bsem_post_all(struct bsem *bsem_p);
|
||||||
|
static void bsem_wait(struct bsem *bsem_p);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================== THREADPOOL ============================ */
|
||||||
|
|
||||||
|
|
||||||
|
/* Initialise thread pool */
|
||||||
|
struct thpool_* thpool_init(int num_threads){
|
||||||
|
|
||||||
|
threads_on_hold = 0;
|
||||||
|
threads_keepalive = 1;
|
||||||
|
|
||||||
|
if (num_threads < 0){
|
||||||
|
num_threads = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make new thread pool */
|
||||||
|
thpool_* thpool_p;
|
||||||
|
thpool_p = (struct thpool_*)malloc(sizeof(struct thpool_));
|
||||||
|
if (thpool_p == NULL){
|
||||||
|
err("thpool_init(): Could not allocate memory for thread pool\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
thpool_p->num_threads_alive = 0;
|
||||||
|
thpool_p->num_threads_working = 0;
|
||||||
|
|
||||||
|
/* Initialise the job queue */
|
||||||
|
if (jobqueue_init(&thpool_p->jobqueue) == -1){
|
||||||
|
err("thpool_init(): Could not allocate memory for job queue\n");
|
||||||
|
free(thpool_p);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make threads in pool */
|
||||||
|
thpool_p->threads = (struct thread**)malloc(num_threads * sizeof(struct thread *));
|
||||||
|
if (thpool_p->threads == NULL){
|
||||||
|
err("thpool_init(): Could not allocate memory for threads\n");
|
||||||
|
jobqueue_destroy(&thpool_p->jobqueue);
|
||||||
|
free(thpool_p);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_init(&(thpool_p->thcount_lock), NULL);
|
||||||
|
pthread_cond_init(&thpool_p->threads_all_idle, NULL);
|
||||||
|
|
||||||
|
/* Thread init */
|
||||||
|
int n;
|
||||||
|
for (n=0; n<num_threads; n++){
|
||||||
|
thread_init(thpool_p, &thpool_p->threads[n], n);
|
||||||
|
#if THPOOL_DEBUG
|
||||||
|
printf("THPOOL_DEBUG: Created thread %d in pool \n", n);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for threads to initialize */
|
||||||
|
while (thpool_p->num_threads_alive != num_threads) {}
|
||||||
|
|
||||||
|
return thpool_p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Add work to the thread pool */
|
||||||
|
int thpool_add_work(thpool_* thpool_p, void (*function_p)(void*), void* arg_p){
|
||||||
|
job* newjob;
|
||||||
|
|
||||||
|
newjob=(struct job*)malloc(sizeof(struct job));
|
||||||
|
if (newjob==NULL){
|
||||||
|
err("thpool_add_work(): Could not allocate memory for new job\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* add function and argument */
|
||||||
|
newjob->function=function_p;
|
||||||
|
newjob->arg=arg_p;
|
||||||
|
|
||||||
|
/* add job to queue */
|
||||||
|
jobqueue_push(&thpool_p->jobqueue, newjob);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Wait until all jobs have finished */
|
||||||
|
void thpool_wait(thpool_* thpool_p){
|
||||||
|
pthread_mutex_lock(&thpool_p->thcount_lock);
|
||||||
|
while (thpool_p->jobqueue.len || thpool_p->num_threads_working) {
|
||||||
|
pthread_cond_wait(&thpool_p->threads_all_idle, &thpool_p->thcount_lock);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&thpool_p->thcount_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Destroy the threadpool */
|
||||||
|
void thpool_destroy(thpool_* thpool_p){
|
||||||
|
/* No need to destroy if it's NULL */
|
||||||
|
if (thpool_p == NULL) return ;
|
||||||
|
|
||||||
|
volatile int threads_total = thpool_p->num_threads_alive;
|
||||||
|
|
||||||
|
/* End each thread 's infinite loop */
|
||||||
|
threads_keepalive = 0;
|
||||||
|
|
||||||
|
/* Give one second to kill idle threads */
|
||||||
|
double TIMEOUT = 1.0;
|
||||||
|
time_t start, end;
|
||||||
|
double tpassed = 0.0;
|
||||||
|
time (&start);
|
||||||
|
while (tpassed < TIMEOUT && thpool_p->num_threads_alive){
|
||||||
|
bsem_post_all(thpool_p->jobqueue.has_jobs);
|
||||||
|
time (&end);
|
||||||
|
tpassed = difftime(end,start);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Poll remaining threads */
|
||||||
|
while (thpool_p->num_threads_alive){
|
||||||
|
bsem_post_all(thpool_p->jobqueue.has_jobs);
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Job queue cleanup */
|
||||||
|
jobqueue_destroy(&thpool_p->jobqueue);
|
||||||
|
/* Deallocs */
|
||||||
|
int n;
|
||||||
|
for (n=0; n < threads_total; n++){
|
||||||
|
thread_destroy(thpool_p->threads[n]);
|
||||||
|
}
|
||||||
|
free(thpool_p->threads);
|
||||||
|
free(thpool_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Pause all threads in threadpool */
|
||||||
|
void thpool_pause(thpool_* thpool_p) {
|
||||||
|
int n;
|
||||||
|
for (n=0; n < thpool_p->num_threads_alive; n++){
|
||||||
|
pthread_kill(thpool_p->threads[n]->pthread, SIGUSR1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Resume all threads in threadpool */
|
||||||
|
void thpool_resume(thpool_* thpool_p) {
|
||||||
|
// resuming a single threadpool hasn't been
|
||||||
|
// implemented yet, meanwhile this suppresses
|
||||||
|
// the warnings
|
||||||
|
(void)thpool_p;
|
||||||
|
|
||||||
|
threads_on_hold = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int thpool_num_threads_working(thpool_* thpool_p){
|
||||||
|
return thpool_p->num_threads_working;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ============================ THREAD ============================== */
|
||||||
|
|
||||||
|
|
||||||
|
/* Initialize a thread in the thread pool
|
||||||
|
*
|
||||||
|
* @param thread address to the pointer of the thread to be created
|
||||||
|
* @param id id to be given to the thread
|
||||||
|
* @return 0 on success, -1 otherwise.
|
||||||
|
*/
|
||||||
|
static int thread_init (thpool_* thpool_p, struct thread** thread_p, int id){
|
||||||
|
|
||||||
|
*thread_p = (struct thread*)malloc(sizeof(struct thread));
|
||||||
|
if (*thread_p == NULL){
|
||||||
|
err("thread_init(): Could not allocate memory for thread\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*thread_p)->thpool_p = thpool_p;
|
||||||
|
(*thread_p)->id = id;
|
||||||
|
|
||||||
|
pthread_create(&(*thread_p)->pthread, NULL, (void * (*)(void *)) thread_do, (*thread_p));
|
||||||
|
pthread_detach((*thread_p)->pthread);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Sets the calling thread on hold */
|
||||||
|
static void thread_hold(int sig_id) {
|
||||||
|
(void)sig_id;
|
||||||
|
threads_on_hold = 1;
|
||||||
|
while (threads_on_hold){
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* What each thread is doing
|
||||||
|
*
|
||||||
|
* In principle this is an endless loop. The only time this loop gets interuppted is once
|
||||||
|
* thpool_destroy() is invoked or the program exits.
|
||||||
|
*
|
||||||
|
* @param thread thread that will run this function
|
||||||
|
* @return nothing
|
||||||
|
*/
|
||||||
|
static void* thread_do(struct thread* thread_p){
|
||||||
|
|
||||||
|
/* Set thread name for profiling and debugging */
|
||||||
|
char thread_name[16] = {0};
|
||||||
|
snprintf(thread_name, 16, "thpool-%d", thread_p->id);
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
/* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
|
||||||
|
prctl(PR_SET_NAME, thread_name);
|
||||||
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
pthread_setname_np(thread_name);
|
||||||
|
#else
|
||||||
|
err("thread_do(): pthread_setname_np is not supported on this system");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Assure all threads have been created before starting serving */
|
||||||
|
thpool_* thpool_p = thread_p->thpool_p;
|
||||||
|
|
||||||
|
/* Register signal handler */
|
||||||
|
struct sigaction act;
|
||||||
|
sigemptyset(&act.sa_mask);
|
||||||
|
act.sa_flags = 0;
|
||||||
|
act.sa_handler = thread_hold;
|
||||||
|
if (sigaction(SIGUSR1, &act, NULL) == -1) {
|
||||||
|
err("thread_do(): cannot handle SIGUSR1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mark thread as alive (initialized) */
|
||||||
|
pthread_mutex_lock(&thpool_p->thcount_lock);
|
||||||
|
thpool_p->num_threads_alive += 1;
|
||||||
|
pthread_mutex_unlock(&thpool_p->thcount_lock);
|
||||||
|
|
||||||
|
while(threads_keepalive){
|
||||||
|
|
||||||
|
bsem_wait(thpool_p->jobqueue.has_jobs);
|
||||||
|
|
||||||
|
if (threads_keepalive){
|
||||||
|
|
||||||
|
pthread_mutex_lock(&thpool_p->thcount_lock);
|
||||||
|
thpool_p->num_threads_working++;
|
||||||
|
pthread_mutex_unlock(&thpool_p->thcount_lock);
|
||||||
|
|
||||||
|
/* Read job from queue and execute it */
|
||||||
|
void (*func_buff)(void*);
|
||||||
|
void* arg_buff;
|
||||||
|
job* job_p = jobqueue_pull(&thpool_p->jobqueue);
|
||||||
|
if (job_p) {
|
||||||
|
func_buff = job_p->function;
|
||||||
|
arg_buff = job_p->arg;
|
||||||
|
func_buff(arg_buff);
|
||||||
|
free(job_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&thpool_p->thcount_lock);
|
||||||
|
thpool_p->num_threads_working--;
|
||||||
|
if (!thpool_p->num_threads_working) {
|
||||||
|
pthread_cond_signal(&thpool_p->threads_all_idle);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&thpool_p->thcount_lock);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&thpool_p->thcount_lock);
|
||||||
|
thpool_p->num_threads_alive --;
|
||||||
|
pthread_mutex_unlock(&thpool_p->thcount_lock);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Frees a thread */
|
||||||
|
static void thread_destroy (thread* thread_p){
|
||||||
|
free(thread_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ============================ JOB QUEUE =========================== */
|
||||||
|
|
||||||
|
|
||||||
|
/* Initialize queue */
|
||||||
|
static int jobqueue_init(jobqueue* jobqueue_p){
|
||||||
|
jobqueue_p->len = 0;
|
||||||
|
jobqueue_p->front = NULL;
|
||||||
|
jobqueue_p->rear = NULL;
|
||||||
|
|
||||||
|
jobqueue_p->has_jobs = (struct bsem*)malloc(sizeof(struct bsem));
|
||||||
|
if (jobqueue_p->has_jobs == NULL){
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_init(&(jobqueue_p->rwmutex), NULL);
|
||||||
|
bsem_init(jobqueue_p->has_jobs, 0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Clear the queue */
|
||||||
|
static void jobqueue_clear(jobqueue* jobqueue_p){
|
||||||
|
|
||||||
|
while(jobqueue_p->len){
|
||||||
|
free(jobqueue_pull(jobqueue_p));
|
||||||
|
}
|
||||||
|
|
||||||
|
jobqueue_p->front = NULL;
|
||||||
|
jobqueue_p->rear = NULL;
|
||||||
|
bsem_reset(jobqueue_p->has_jobs);
|
||||||
|
jobqueue_p->len = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Add (allocated) job to queue
|
||||||
|
*/
|
||||||
|
static void jobqueue_push(jobqueue* jobqueue_p, struct job* newjob){
|
||||||
|
|
||||||
|
pthread_mutex_lock(&jobqueue_p->rwmutex);
|
||||||
|
newjob->prev = NULL;
|
||||||
|
|
||||||
|
switch(jobqueue_p->len){
|
||||||
|
|
||||||
|
case 0: /* if no jobs in queue */
|
||||||
|
jobqueue_p->front = newjob;
|
||||||
|
jobqueue_p->rear = newjob;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: /* if jobs in queue */
|
||||||
|
jobqueue_p->rear->prev = newjob;
|
||||||
|
jobqueue_p->rear = newjob;
|
||||||
|
|
||||||
|
}
|
||||||
|
jobqueue_p->len++;
|
||||||
|
|
||||||
|
bsem_post(jobqueue_p->has_jobs);
|
||||||
|
pthread_mutex_unlock(&jobqueue_p->rwmutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Get first job from queue(removes it from queue)
|
||||||
|
* Notice: Caller MUST hold a mutex
|
||||||
|
*/
|
||||||
|
static struct job* jobqueue_pull(jobqueue* jobqueue_p){
|
||||||
|
|
||||||
|
pthread_mutex_lock(&jobqueue_p->rwmutex);
|
||||||
|
job* job_p = jobqueue_p->front;
|
||||||
|
|
||||||
|
switch(jobqueue_p->len){
|
||||||
|
|
||||||
|
case 0: /* if no jobs in queue */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1: /* if one job in queue */
|
||||||
|
jobqueue_p->front = NULL;
|
||||||
|
jobqueue_p->rear = NULL;
|
||||||
|
jobqueue_p->len = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: /* if >1 jobs in queue */
|
||||||
|
jobqueue_p->front = job_p->prev;
|
||||||
|
jobqueue_p->len--;
|
||||||
|
/* more than one job in queue -> post it */
|
||||||
|
bsem_post(jobqueue_p->has_jobs);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(&jobqueue_p->rwmutex);
|
||||||
|
return job_p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Free all queue resources back to the system */
|
||||||
|
static void jobqueue_destroy(jobqueue* jobqueue_p){
|
||||||
|
jobqueue_clear(jobqueue_p);
|
||||||
|
free(jobqueue_p->has_jobs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ======================== SYNCHRONISATION ========================= */
|
||||||
|
|
||||||
|
|
||||||
|
/* Init semaphore to 1 or 0 */
|
||||||
|
static void bsem_init(bsem *bsem_p, int value) {
|
||||||
|
if (value < 0 || value > 1) {
|
||||||
|
err("bsem_init(): Binary semaphore can take only values 1 or 0");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
pthread_mutex_init(&(bsem_p->mutex), NULL);
|
||||||
|
pthread_cond_init(&(bsem_p->cond), NULL);
|
||||||
|
bsem_p->v = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Reset semaphore to 0 */
|
||||||
|
static void bsem_reset(bsem *bsem_p) {
|
||||||
|
bsem_init(bsem_p, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Post to at least one thread */
|
||||||
|
static void bsem_post(bsem *bsem_p) {
|
||||||
|
pthread_mutex_lock(&bsem_p->mutex);
|
||||||
|
bsem_p->v = 1;
|
||||||
|
pthread_cond_signal(&bsem_p->cond);
|
||||||
|
pthread_mutex_unlock(&bsem_p->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Post to all threads */
|
||||||
|
static void bsem_post_all(bsem *bsem_p) {
|
||||||
|
pthread_mutex_lock(&bsem_p->mutex);
|
||||||
|
bsem_p->v = 1;
|
||||||
|
pthread_cond_broadcast(&bsem_p->cond);
|
||||||
|
pthread_mutex_unlock(&bsem_p->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Wait on semaphore until semaphore has value 0 */
|
||||||
|
static void bsem_wait(bsem* bsem_p) {
|
||||||
|
pthread_mutex_lock(&bsem_p->mutex);
|
||||||
|
while (bsem_p->v != 1) {
|
||||||
|
pthread_cond_wait(&bsem_p->cond, &bsem_p->mutex);
|
||||||
|
}
|
||||||
|
bsem_p->v = 0;
|
||||||
|
pthread_mutex_unlock(&bsem_p->mutex);
|
||||||
|
}
|
187
thpool.h
Normal file
187
thpool.h
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
/**********************************
|
||||||
|
* @author Johan Hanssen Seferidis
|
||||||
|
* License: MIT
|
||||||
|
*
|
||||||
|
**********************************/
|
||||||
|
|
||||||
|
#ifndef _THPOOL_
|
||||||
|
#define _THPOOL_
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* =================================== API ======================================= */
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct thpool_* threadpool;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize threadpool
|
||||||
|
*
|
||||||
|
* Initializes a threadpool. This function will not return until all
|
||||||
|
* threads have initialized successfully.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
*
|
||||||
|
* ..
|
||||||
|
* threadpool thpool; //First we declare a threadpool
|
||||||
|
* thpool = thpool_init(4); //then we initialize it to 4 threads
|
||||||
|
* ..
|
||||||
|
*
|
||||||
|
* @param num_threads number of threads to be created in the threadpool
|
||||||
|
* @return threadpool created threadpool on success,
|
||||||
|
* NULL on error
|
||||||
|
*/
|
||||||
|
threadpool thpool_init(int num_threads);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Add work to the job queue
|
||||||
|
*
|
||||||
|
* Takes an action and its argument and adds it to the threadpool's job queue.
|
||||||
|
* If you want to add to work a function with more than one arguments then
|
||||||
|
* a way to implement this is by passing a pointer to a structure.
|
||||||
|
*
|
||||||
|
* NOTICE: You have to cast both the function and argument to not get warnings.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
*
|
||||||
|
* void print_num(int num){
|
||||||
|
* printf("%d\n", num);
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* int main() {
|
||||||
|
* ..
|
||||||
|
* int a = 10;
|
||||||
|
* thpool_add_work(thpool, (void*)print_num, (void*)a);
|
||||||
|
* ..
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @param threadpool threadpool to which the work will be added
|
||||||
|
* @param function_p pointer to function to add as work
|
||||||
|
* @param arg_p pointer to an argument
|
||||||
|
* @return 0 on success, -1 otherwise.
|
||||||
|
*/
|
||||||
|
int thpool_add_work(threadpool, void (*function_p)(void*), void* arg_p);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Wait for all queued jobs to finish
|
||||||
|
*
|
||||||
|
* Will wait for all jobs - both queued and currently running to finish.
|
||||||
|
* Once the queue is empty and all work has completed, the calling thread
|
||||||
|
* (probably the main program) will continue.
|
||||||
|
*
|
||||||
|
* Smart polling is used in wait. The polling is initially 0 - meaning that
|
||||||
|
* there is virtually no polling at all. If after 1 seconds the threads
|
||||||
|
* haven't finished, the polling interval starts growing exponentially
|
||||||
|
* until it reaches max_secs seconds. Then it jumps down to a maximum polling
|
||||||
|
* interval assuming that heavy processing is being used in the threadpool.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
*
|
||||||
|
* ..
|
||||||
|
* threadpool thpool = thpool_init(4);
|
||||||
|
* ..
|
||||||
|
* // Add a bunch of work
|
||||||
|
* ..
|
||||||
|
* thpool_wait(thpool);
|
||||||
|
* puts("All added work has finished");
|
||||||
|
* ..
|
||||||
|
*
|
||||||
|
* @param threadpool the threadpool to wait for
|
||||||
|
* @return nothing
|
||||||
|
*/
|
||||||
|
void thpool_wait(threadpool);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Pauses all threads immediately
|
||||||
|
*
|
||||||
|
* The threads will be paused no matter if they are idle or working.
|
||||||
|
* The threads return to their previous states once thpool_resume
|
||||||
|
* is called.
|
||||||
|
*
|
||||||
|
* While the thread is being paused, new work can be added.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
*
|
||||||
|
* threadpool thpool = thpool_init(4);
|
||||||
|
* thpool_pause(thpool);
|
||||||
|
* ..
|
||||||
|
* // Add a bunch of work
|
||||||
|
* ..
|
||||||
|
* thpool_resume(thpool); // Let the threads start their magic
|
||||||
|
*
|
||||||
|
* @param threadpool the threadpool where the threads should be paused
|
||||||
|
* @return nothing
|
||||||
|
*/
|
||||||
|
void thpool_pause(threadpool);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Unpauses all threads if they are paused
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ..
|
||||||
|
* thpool_pause(thpool);
|
||||||
|
* sleep(10); // Delay execution 10 seconds
|
||||||
|
* thpool_resume(thpool);
|
||||||
|
* ..
|
||||||
|
*
|
||||||
|
* @param threadpool the threadpool where the threads should be unpaused
|
||||||
|
* @return nothing
|
||||||
|
*/
|
||||||
|
void thpool_resume(threadpool);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy the threadpool
|
||||||
|
*
|
||||||
|
* This will wait for the currently active threads to finish and then 'kill'
|
||||||
|
* the whole threadpool to free up memory.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* int main() {
|
||||||
|
* threadpool thpool1 = thpool_init(2);
|
||||||
|
* threadpool thpool2 = thpool_init(2);
|
||||||
|
* ..
|
||||||
|
* thpool_destroy(thpool1);
|
||||||
|
* ..
|
||||||
|
* return 0;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @param threadpool the threadpool to destroy
|
||||||
|
* @return nothing
|
||||||
|
*/
|
||||||
|
void thpool_destroy(threadpool);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Show currently working threads
|
||||||
|
*
|
||||||
|
* Working threads are the threads that are performing work (not idle).
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* int main() {
|
||||||
|
* threadpool thpool1 = thpool_init(2);
|
||||||
|
* threadpool thpool2 = thpool_init(2);
|
||||||
|
* ..
|
||||||
|
* printf("Working threads: %d\n", thpool_num_threads_working(thpool1));
|
||||||
|
* ..
|
||||||
|
* return 0;
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @param threadpool the threadpool of interest
|
||||||
|
* @return integer number of threads working
|
||||||
|
*/
|
||||||
|
int thpool_num_threads_working(threadpool);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue
Block a user