Merge remote-tracking branch 'tp/Pithikos-C-Thread-Pool2' into tp_schedule

This commit is contained in:
Howard Su 2023-04-07 21:31:07 +08:00
commit b8c9b27452
6 changed files with 886 additions and 169 deletions

View File

@ -326,7 +326,7 @@ jobs:
# sudo apt-get install cmake # sudo apt-get install cmake
# #
# - name: Configure # - name: Configure
# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON # run: cmake . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
# #
# - name: Build # - name: Build
# run: | # run: |

View File

@ -336,7 +336,10 @@ endif()
add_library(ggml OBJECT add_library(ggml OBJECT
ggml.c ggml.c
ggml.h) ggml.h
thpool.c
thpool.h
)
target_include_directories(ggml PUBLIC .) target_include_directories(ggml PUBLIC .)
target_compile_features(ggml PUBLIC c_std_11) # don't bump target_compile_features(ggml PUBLIC c_std_11) # don't bump

View File

@ -139,6 +139,9 @@ default: main quantize perplexity embedding
# Build library # Build library
# #
thpool.o: thpool.c thpool.h
$(CC) $(CFLAGS) -c thpool.c -o thpool.o
ggml.o: ggml.c ggml.h ggml.o: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c -o ggml.o $(CC) $(CFLAGS) -c ggml.c -o ggml.o
@ -151,20 +154,20 @@ common.o: examples/common.cpp examples/common.h
clean: clean:
rm -vf *.o main quantize perplexity embedding rm -vf *.o main quantize perplexity embedding
main: examples/main/main.cpp ggml.o llama.o common.o main: examples/main/main.cpp thpool.o ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/main/main.cpp thpool.o ggml.o llama.o common.o -o main $(LDFLAGS)
@echo @echo
@echo '==== Run ./main -h for help. ====' @echo '==== Run ./main -h for help. ===='
@echo @echo
quantize: examples/quantize/quantize.cpp ggml.o llama.o quantize: examples/quantize/quantize.cpp thpool.o ggml.o llama.o
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp thpool.o ggml.o llama.o -o quantize $(LDFLAGS)
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp thpool.o ggml.o llama.o common.o -o perplexity $(LDFLAGS)
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o embedding: examples/embedding/embedding.cpp thpool.o ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp thpool.o ggml.o llama.o common.o -o embedding $(LDFLAGS)
# #
# Tests # Tests

199
ggml.c
View File

@ -3,6 +3,8 @@
#include "ggml.h" #include "ggml.h"
#include "thpool.h"
#if defined(_MSC_VER) || defined(__MINGW32__) #if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW #include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@ -51,28 +53,11 @@ static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
return atomic_fetch_add(ptr, -(dec)); return atomic_fetch_add(ptr, -(dec));
} }
typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, arg, 0, NULL);
if (handle == NULL)
{
return EAGAIN;
}
*out = handle;
return 0;
}
static int pthread_join(pthread_t thread, void* unused) {
return (int) WaitForSingleObject(thread, INFINITE);
}
static int sched_yield (void) { static int sched_yield (void) {
Sleep (0); Sleep (0);
return 0; return 0;
} }
#else #else
#include <pthread.h> #include <pthread.h>
#include <stdatomic.h> #include <stdatomic.h>
@ -2726,6 +2711,7 @@ struct ggml_context {
struct ggml_scratch scratch; struct ggml_scratch scratch;
struct ggml_scratch scratch_save; struct ggml_scratch scratch_save;
threadpool tpool;
}; };
struct ggml_context_container { struct ggml_context_container {
@ -3010,6 +2996,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
/*.objects_end =*/ NULL, /*.objects_end =*/ NULL,
/*.scratch =*/ { 0, 0, NULL, }, /*.scratch =*/ { 0, 0, NULL, },
/*.scratch_save =*/ { 0, 0, NULL, }, /*.scratch_save =*/ { 0, 0, NULL, },
/*.thpool =*/ NULL,
}; };
GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure
@ -9214,6 +9201,19 @@ typedef pthread_t ggml_thread_t;
#define ggml_thread_create pthread_create #define ggml_thread_create pthread_create
#define ggml_thread_join pthread_join #define ggml_thread_join pthread_join
typedef pthread_mutex_t ggml_mutex_t;
typedef pthread_cond_t ggml_cond_t;
#define ggml_mutex_init pthread_mutex_init
#define ggml_mutex_destroy pthread_mutex_destroy
#define ggml_cond_init pthread_cond_init
#define ggml_cond_destroy pthread_cond_destroy
#define ggml_mutex_lock pthread_mutex_lock
#define ggml_mutex_unlock pthread_mutex_unlock
#define ggml_cond_broadcast pthread_cond_broadcast
#define ggml_cond_wait pthread_cond_wait
#else #else
//typedef pthread_spinlock_t ggml_lock_t; //typedef pthread_spinlock_t ggml_lock_t;
@ -9232,26 +9232,27 @@ typedef int ggml_lock_t;
#define GGML_LOCK_INITIALIZER 0 #define GGML_LOCK_INITIALIZER 0
typedef pthread_t ggml_thread_t;
#define ggml_thread_create pthread_create #define ggml_thread_create pthread_create
#define ggml_thread_join pthread_join #define ggml_thread_join pthread_join
#define ggml_mutex_init pthread_mutex_init
#define ggml_mutex_destroy pthread_mutex_destroy
#define ggml_cond_init pthread_cond_init
#define ggml_cond_destroy pthread_cond_destroy
#define ggml_mutex_lock pthread_mutex_lock
#define ggml_mutex_unlock pthread_mutex_unlock
#define ggml_cond_broadcast pthread_cond_broadcast
#define ggml_cond_wait pthread_cond_wait
#endif #endif
struct ggml_compute_state_shared { struct ggml_compute_state_shared {
ggml_lock_t spin;
int n_threads; int n_threads;
// synchronization primitives
atomic_int n_ready;
atomic_bool has_work;
atomic_bool stop; // stop all threads
}; };
struct ggml_compute_state { struct ggml_compute_state {
ggml_thread_t thrd;
struct ggml_compute_params params; struct ggml_compute_params params;
struct ggml_tensor * node; struct ggml_tensor * node;
@ -9259,75 +9260,28 @@ struct ggml_compute_state {
struct ggml_compute_state_shared * shared; struct ggml_compute_state_shared * shared;
}; };
static thread_ret_t ggml_graph_compute_thread(void * data) { static void ggml_graph_compute_thread(void * data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data; struct ggml_compute_state * state = (struct ggml_compute_state *) data;
if (state->node) {
const int n_threads = state->shared->n_threads; if (state->params.ith < state->params.nth) {
ggml_compute_forward(&state->params, state->node);
while (true) {
if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
atomic_store(&state->shared->has_work, false);
} else {
while (atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) {
return 0;
}
ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin);
}
}
atomic_fetch_sub(&state->shared->n_ready, 1);
// wait for work
while (!atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) {
return 0;
}
ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin);
}
// check if we should stop
if (atomic_load(&state->shared->stop)) {
break;
}
if (state->node) {
if (state->params.ith < state->params.nth) {
ggml_compute_forward(&state->params, state->node);
}
state->node = NULL;
} else {
break;
} }
state->node = NULL;
} }
return 0;
} }
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
const int n_threads = cgraph->n_threads; const int n_threads = cgraph->n_threads;
struct ggml_compute_state_shared state_shared = { struct ggml_compute_state_shared state_shared = {
/*.spin =*/ GGML_LOCK_INITIALIZER,
/*.n_threads =*/ n_threads, /*.n_threads =*/ n_threads,
/*.n_ready =*/ 0,
/*.has_work =*/ false,
/*.stop =*/ false,
}; };
struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL; struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL;
// create thread pool // create thread pool
if (n_threads > 1) { if (n_threads > 1) {
ggml_lock_init(&state_shared.spin); ctx->tpool = thpool_init(n_threads);
atomic_store(&state_shared.has_work, true);
for (int j = 0; j < n_threads - 1; j++) { for (int j = 0; j < n_threads - 1; j++) {
workers[j] = (struct ggml_compute_state) { workers[j] = (struct ggml_compute_state) {
.thrd = 0,
.params = { .params = {
.type = GGML_TASK_COMPUTE, .type = GGML_TASK_COMPUTE,
.ith = j + 1, .ith = j + 1,
@ -9338,10 +9292,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
.node = NULL, .node = NULL,
.shared = &state_shared, .shared = &state_shared,
}; };
int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
GGML_ASSERT(rc == 0);
UNUSED(rc);
} }
} }
@ -9579,15 +9529,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
// COMPUTE // COMPUTE
if (node->n_tasks > 1) { if (node->n_tasks > 1) {
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
atomic_store(&state_shared.has_work, false);
}
while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
// launch thread pool // launch thread pool
for (int j = 0; j < n_threads - 1; j++) { for (int j = 0; j < n_threads - 1; j++) {
workers[j].params = (struct ggml_compute_params) { workers[j].params = (struct ggml_compute_params) {
@ -9598,16 +9539,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
.wdata = cgraph->work ? cgraph->work->data : NULL, .wdata = cgraph->work ? cgraph->work->data : NULL,
}; };
workers[j].node = node; workers[j].node = node;
thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[j]);
} }
atomic_fetch_sub(&state_shared.n_ready, 1);
while (atomic_load(&state_shared.n_ready) > 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
atomic_store(&state_shared.has_work, true);
} }
params.type = GGML_TASK_COMPUTE; params.type = GGML_TASK_COMPUTE;
@ -9615,34 +9548,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
// wait for thread pool // wait for thread pool
if (node->n_tasks > 1) { if (node->n_tasks > 1) {
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { thpool_wait(ctx->tpool);
atomic_store(&state_shared.has_work, false);
}
while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
atomic_fetch_sub(&state_shared.n_ready, 1);
while (atomic_load(&state_shared.n_ready) != 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
} }
// FINALIZE // FINALIZE
if (node->n_tasks > 1) { if (node->n_tasks > 1) {
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) {
atomic_store(&state_shared.has_work, false);
}
while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
// launch thread pool // launch thread pool
for (int j = 0; j < n_threads - 1; j++) { for (int j = 0; j < n_threads - 1; j++) {
workers[j].params = (struct ggml_compute_params) { workers[j].params = (struct ggml_compute_params) {
@ -9653,16 +9563,8 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
.wdata = cgraph->work ? cgraph->work->data : NULL, .wdata = cgraph->work ? cgraph->work->data : NULL,
}; };
workers[j].node = node; workers[j].node = node;
thpool_add_work(ctx->tpool, ggml_graph_compute_thread, &workers[j]);
} }
atomic_fetch_sub(&state_shared.n_ready, 1);
while (atomic_load(&state_shared.n_ready) > 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
atomic_store(&state_shared.has_work, true);
} }
params.type = GGML_TASK_FINALIZE; params.type = GGML_TASK_FINALIZE;
@ -9670,21 +9572,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
// wait for thread pool // wait for thread pool
if (node->n_tasks > 1) { if (node->n_tasks > 1) {
if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { thpool_wait(ctx->tpool);
atomic_store(&state_shared.has_work, false);
}
while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
atomic_fetch_sub(&state_shared.n_ready, 1);
while (atomic_load(&state_shared.n_ready) != 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
} }
// performance stats (node) // performance stats (node)
@ -9700,16 +9588,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
// join thread pool // join thread pool
if (n_threads > 1) { if (n_threads > 1) {
atomic_store(&state_shared.stop, true); thpool_destroy(ctx->tpool);
atomic_store(&state_shared.has_work, true);
for (int j = 0; j < n_threads - 1; j++) {
int rc = ggml_thread_join(workers[j].thrd, NULL);
GGML_ASSERT(rc == 0);
UNUSED(rc);
}
ggml_lock_destroy(&state_shared.spin);
} }
// performance stats (graph) // performance stats (graph)

645
thpool.c Normal file
View File

@ -0,0 +1,645 @@
/* ********************************
* Author: Johan Hanssen Seferidis
* License: MIT
* Description: Library providing a threading pool where you can add
* work. For usage, check the thpool.h file or README.md
*
*//** @file thpool.h *//*
*
********************************/
#if defined(__APPLE__)
#include <AvailabilityMacros.h>
#else
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif
#endif
/*
this is not part of original thpool, thats me hacking it to work on windows
*/
#if defined _MSC_VER || defined(__MINGW32__)
#if !defined(__MINGW32__)
#include <Windows.h>
#else
// ref: https://github.com/ggerganov/whisper.cpp/issues/168
#include <windows.h>
#endif
unsigned int sleep(unsigned int seconds) {
Sleep(seconds * 1000);
return 0;
}
typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, arg, 0, NULL);
if (handle == NULL)
{
return EAGAIN;
}
*out = handle;
return 0;
}
static int pthread_join(pthread_t thread, void* unused) {
return (int) WaitForSingleObject(thread, INFINITE);
}
static int pthread_detach(pthread_t thread) {
CloseHandle(thread);
return 0;
}
typedef struct pthread_mutex_tag {
CRITICAL_SECTION critical_section;
} pthread_mutex_t;
typedef struct pthread_mutexattr_tag {
int attr;
} pthread_mutexattr_t;
int pthread_mutex_init(pthread_mutex_t * mutex, const pthread_mutexattr_t * attr) {
InitializeCriticalSection (&mutex->critical_section);
return 0;
}
int pthread_mutex_destroy(pthread_mutex_t * mutex) {
DeleteCriticalSection(&mutex->critical_section);
return 0;
}
int pthread_mutex_lock(pthread_mutex_t * mutex) {
EnterCriticalSection(&mutex->critical_section);
return 0;
}
int pthread_mutex_unlock(pthread_mutex_t * mutex) {
LeaveCriticalSection(&mutex->critical_section);
return 0;
}
typedef struct pthread_cond_tag {
CONDITION_VARIABLE cond;
} pthread_cond_t;
int pthread_cond_init(pthread_cond_t * cond, void * unused) {
InitializeConditionVariable (&cond->cond);
return 0;
}
int pthread_cond_destroy(pthread_cond_t * cond) {
return 0;
}
int pthread_cond_wait(pthread_cond_t * cond, pthread_mutex_t * mutex) {
SleepConditionVariableCS(&cond->cond, &mutex->critical_section, INFINITE);
return 0;
}
int pthread_cond_broadcast(pthread_cond_t * cond) {
WakeAllConditionVariable(&cond->cond);
return 0;
}
int pthread_cond_signal(pthread_cond_t * cond) {
WakeConditionVariable(&cond->cond);
return 0;
}
#else
#include <unistd.h>
#include <pthread.h>
#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <time.h>
#if defined(__linux__)
#include <sys/prctl.h>
#endif
#include "thpool.h"
#ifdef THPOOL_DEBUG
#define THPOOL_DEBUG 1
#else
#define THPOOL_DEBUG 0
#endif
#if !defined(DISABLE_PRINT) || defined(THPOOL_DEBUG)
#define err(str) fprintf(stderr, str)
#else
#define err(str)
#endif
static volatile int threads_keepalive;
static volatile int threads_on_hold;
/* ========================== STRUCTURES ============================ */
/* Binary semaphore */
typedef struct bsem {
pthread_mutex_t mutex;
pthread_cond_t cond;
int v;
} bsem;
/* Job */
typedef struct job{
struct job* prev; /* pointer to previous job */
void (*function)(void* arg); /* function pointer */
void* arg; /* function's argument */
} job;
/* Job queue */
typedef struct jobqueue{
pthread_mutex_t rwmutex; /* used for queue r/w access */
job *front; /* pointer to front of queue */
job *rear; /* pointer to rear of queue */
bsem *has_jobs; /* flag as binary semaphore */
int len; /* number of jobs in queue */
} jobqueue;
/* Thread */
typedef struct thread{
int id; /* friendly id */
pthread_t pthread; /* pointer to actual thread */
struct thpool_* thpool_p; /* access to thpool */
} thread;
/* Threadpool */
typedef struct thpool_{
thread** threads; /* pointer to threads */
volatile int num_threads_alive; /* threads currently alive */
volatile int num_threads_working; /* threads currently working */
pthread_mutex_t thcount_lock; /* used for thread count etc */
pthread_cond_t threads_all_idle; /* signal to thpool_wait */
jobqueue jobqueue; /* job queue */
} thpool_;
/* ========================== PROTOTYPES ============================ */
static int thread_init(thpool_* thpool_p, struct thread** thread_p, int id);
static void* thread_do(struct thread* thread_p);
static void thread_hold(int sig_id);
static void thread_destroy(struct thread* thread_p);
static int jobqueue_init(jobqueue* jobqueue_p);
static void jobqueue_clear(jobqueue* jobqueue_p);
static void jobqueue_push(jobqueue* jobqueue_p, struct job* newjob_p);
static struct job* jobqueue_pull(jobqueue* jobqueue_p);
static void jobqueue_destroy(jobqueue* jobqueue_p);
static void bsem_init(struct bsem *bsem_p, int value);
static void bsem_reset(struct bsem *bsem_p);
static void bsem_post(struct bsem *bsem_p);
static void bsem_post_all(struct bsem *bsem_p);
static void bsem_wait(struct bsem *bsem_p);
/* ========================== THREADPOOL ============================ */
/* Initialise thread pool */
struct thpool_* thpool_init(int num_threads){
threads_on_hold = 0;
threads_keepalive = 1;
if (num_threads < 0){
num_threads = 0;
}
/* Make new thread pool */
thpool_* thpool_p;
thpool_p = (struct thpool_*)malloc(sizeof(struct thpool_));
if (thpool_p == NULL){
err("thpool_init(): Could not allocate memory for thread pool\n");
return NULL;
}
thpool_p->num_threads_alive = 0;
thpool_p->num_threads_working = 0;
/* Initialise the job queue */
if (jobqueue_init(&thpool_p->jobqueue) == -1){
err("thpool_init(): Could not allocate memory for job queue\n");
free(thpool_p);
return NULL;
}
/* Make threads in pool */
thpool_p->threads = (struct thread**)malloc(num_threads * sizeof(struct thread *));
if (thpool_p->threads == NULL){
err("thpool_init(): Could not allocate memory for threads\n");
jobqueue_destroy(&thpool_p->jobqueue);
free(thpool_p);
return NULL;
}
pthread_mutex_init(&(thpool_p->thcount_lock), NULL);
pthread_cond_init(&thpool_p->threads_all_idle, NULL);
/* Thread init */
int n;
for (n=0; n<num_threads; n++){
thread_init(thpool_p, &thpool_p->threads[n], n);
#if THPOOL_DEBUG
printf("THPOOL_DEBUG: Created thread %d in pool \n", n);
#endif
}
/* Wait for threads to initialize */
while (thpool_p->num_threads_alive != num_threads) {}
return thpool_p;
}
/* Add work to the thread pool */
int thpool_add_work(thpool_* thpool_p, void (*function_p)(void*), void* arg_p){
job* newjob;
newjob=(struct job*)malloc(sizeof(struct job));
if (newjob==NULL){
err("thpool_add_work(): Could not allocate memory for new job\n");
return -1;
}
/* add function and argument */
newjob->function=function_p;
newjob->arg=arg_p;
/* add job to queue */
jobqueue_push(&thpool_p->jobqueue, newjob);
return 0;
}
/* Wait until all jobs have finished */
void thpool_wait(thpool_* thpool_p){
pthread_mutex_lock(&thpool_p->thcount_lock);
while (thpool_p->jobqueue.len || thpool_p->num_threads_working) {
pthread_cond_wait(&thpool_p->threads_all_idle, &thpool_p->thcount_lock);
}
pthread_mutex_unlock(&thpool_p->thcount_lock);
}
/* Destroy the threadpool */
void thpool_destroy(thpool_* thpool_p){
/* No need to destroy if it's NULL */
if (thpool_p == NULL) return ;
volatile int threads_total = thpool_p->num_threads_alive;
/* End each thread 's infinite loop */
threads_keepalive = 0;
/* Give one second to kill idle threads */
double TIMEOUT = 1.0;
time_t start, end;
double tpassed = 0.0;
time (&start);
while (tpassed < TIMEOUT && thpool_p->num_threads_alive){
bsem_post_all(thpool_p->jobqueue.has_jobs);
time (&end);
tpassed = difftime(end,start);
}
/* Poll remaining threads */
while (thpool_p->num_threads_alive){
bsem_post_all(thpool_p->jobqueue.has_jobs);
sleep(1);
}
/* Job queue cleanup */
jobqueue_destroy(&thpool_p->jobqueue);
/* Deallocs */
int n;
for (n=0; n < threads_total; n++){
thread_destroy(thpool_p->threads[n]);
}
free(thpool_p->threads);
free(thpool_p);
}
/* Resume all threads in threadpool */
void thpool_resume(thpool_* thpool_p) {
// resuming a single threadpool hasn't been
// implemented yet, meanwhile this suppresses
// the warnings
(void)thpool_p;
threads_on_hold = 0;
}
int thpool_num_threads_working(thpool_* thpool_p){
return thpool_p->num_threads_working;
}
/* ============================ THREAD ============================== */
/* Initialize a thread in the thread pool
*
* @param thread address to the pointer of the thread to be created
* @param id id to be given to the thread
* @return 0 on success, -1 otherwise.
*/
static int thread_init (thpool_* thpool_p, struct thread** thread_p, int id){
*thread_p = (struct thread*)malloc(sizeof(struct thread));
if (*thread_p == NULL){
err("thread_init(): Could not allocate memory for thread\n");
return -1;
}
(*thread_p)->thpool_p = thpool_p;
(*thread_p)->id = id;
pthread_create(&(*thread_p)->pthread, NULL, (void * (*)(void *)) thread_do, (*thread_p));
pthread_detach((*thread_p)->pthread);
return 0;
}
/* Sets the calling thread on hold */
static void thread_hold(int sig_id) {
(void)sig_id;
threads_on_hold = 1;
while (threads_on_hold){
sleep(1);
}
}
/* What each thread is doing
*
* In principle this is an endless loop. The only time this loop gets interuppted is once
* thpool_destroy() is invoked or the program exits.
*
* @param thread thread that will run this function
* @return nothing
*/
static void* thread_do(struct thread* thread_p){
/* Set thread name for profiling and debugging */
char thread_name[16] = {0};
snprintf(thread_name, 16, "thpool-%d", thread_p->id);
#if defined(__linux__)
/* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
prctl(PR_SET_NAME, thread_name);
#elif defined(__APPLE__) && defined(__MACH__)
pthread_setname_np(thread_name);
#else
// err("thread_do(): pthread_setname_np is not supported on this system");
#endif
/* Assure all threads have been created before starting serving */
thpool_* thpool_p = thread_p->thpool_p;
/* Register signal handler */
/*
///// HACK
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
act.sa_handler = thread_hold;
if (sigaction(SIGUSR1, &act, NULL) == -1) {
err("thread_do(): cannot handle SIGUSR1");
}*/
/* Mark thread as alive (initialized) */
pthread_mutex_lock(&thpool_p->thcount_lock);
thpool_p->num_threads_alive += 1;
pthread_mutex_unlock(&thpool_p->thcount_lock);
while(threads_keepalive){
bsem_wait(thpool_p->jobqueue.has_jobs);
if (threads_keepalive){
pthread_mutex_lock(&thpool_p->thcount_lock);
thpool_p->num_threads_working++;
pthread_mutex_unlock(&thpool_p->thcount_lock);
/* Read job from queue and execute it */
void (*func_buff)(void*);
void* arg_buff;
job* job_p = jobqueue_pull(&thpool_p->jobqueue);
if (job_p) {
func_buff = job_p->function;
arg_buff = job_p->arg;
func_buff(arg_buff);
free(job_p);
}
pthread_mutex_lock(&thpool_p->thcount_lock);
thpool_p->num_threads_working--;
if (!thpool_p->num_threads_working) {
pthread_cond_signal(&thpool_p->threads_all_idle);
}
pthread_mutex_unlock(&thpool_p->thcount_lock);
}
}
pthread_mutex_lock(&thpool_p->thcount_lock);
thpool_p->num_threads_alive --;
pthread_mutex_unlock(&thpool_p->thcount_lock);
return NULL;
}
/* Frees a thread */
static void thread_destroy (thread* thread_p){
free(thread_p);
}
/* ============================ JOB QUEUE =========================== */
/* Initialize queue */
static int jobqueue_init(jobqueue* jobqueue_p){
jobqueue_p->len = 0;
jobqueue_p->front = NULL;
jobqueue_p->rear = NULL;
jobqueue_p->has_jobs = (struct bsem*)malloc(sizeof(struct bsem));
if (jobqueue_p->has_jobs == NULL){
return -1;
}
pthread_mutex_init(&(jobqueue_p->rwmutex), NULL);
bsem_init(jobqueue_p->has_jobs, 0);
return 0;
}
/* Clear the queue */
static void jobqueue_clear(jobqueue* jobqueue_p){
while(jobqueue_p->len){
free(jobqueue_pull(jobqueue_p));
}
jobqueue_p->front = NULL;
jobqueue_p->rear = NULL;
bsem_reset(jobqueue_p->has_jobs);
jobqueue_p->len = 0;
}
/* Add (allocated) job to queue
*/
static void jobqueue_push(jobqueue* jobqueue_p, struct job* newjob){
pthread_mutex_lock(&jobqueue_p->rwmutex);
newjob->prev = NULL;
switch(jobqueue_p->len){
case 0: /* if no jobs in queue */
jobqueue_p->front = newjob;
jobqueue_p->rear = newjob;
break;
default: /* if jobs in queue */
jobqueue_p->rear->prev = newjob;
jobqueue_p->rear = newjob;
}
jobqueue_p->len++;
bsem_post(jobqueue_p->has_jobs);
pthread_mutex_unlock(&jobqueue_p->rwmutex);
}
/* Get first job from queue(removes it from queue)
* Notice: Caller MUST hold a mutex
*/
static struct job* jobqueue_pull(jobqueue* jobqueue_p){
pthread_mutex_lock(&jobqueue_p->rwmutex);
job* job_p = jobqueue_p->front;
switch(jobqueue_p->len){
case 0: /* if no jobs in queue */
break;
case 1: /* if one job in queue */
jobqueue_p->front = NULL;
jobqueue_p->rear = NULL;
jobqueue_p->len = 0;
break;
default: /* if >1 jobs in queue */
jobqueue_p->front = job_p->prev;
jobqueue_p->len--;
/* more than one job in queue -> post it */
bsem_post(jobqueue_p->has_jobs);
}
pthread_mutex_unlock(&jobqueue_p->rwmutex);
return job_p;
}
/* Free all queue resources back to the system */
static void jobqueue_destroy(jobqueue* jobqueue_p){
jobqueue_clear(jobqueue_p);
free(jobqueue_p->has_jobs);
}
/* ======================== SYNCHRONISATION ========================= */
/* Init semaphore to 1 or 0 */
static void bsem_init(bsem *bsem_p, int value) {
if (value < 0 || value > 1) {
err("bsem_init(): Binary semaphore can take only values 1 or 0");
exit(1);
}
pthread_mutex_init(&(bsem_p->mutex), NULL);
pthread_cond_init(&(bsem_p->cond), NULL);
bsem_p->v = value;
}
/* Reset semaphore to 0 */
static void bsem_reset(bsem *bsem_p) {
bsem_init(bsem_p, 0);
}
/* Post to at least one thread */
static void bsem_post(bsem *bsem_p) {
pthread_mutex_lock(&bsem_p->mutex);
bsem_p->v = 1;
pthread_cond_signal(&bsem_p->cond);
pthread_mutex_unlock(&bsem_p->mutex);
}
/* Post to all threads */
static void bsem_post_all(bsem *bsem_p) {
pthread_mutex_lock(&bsem_p->mutex);
bsem_p->v = 1;
pthread_cond_broadcast(&bsem_p->cond);
pthread_mutex_unlock(&bsem_p->mutex);
}
/* Wait on semaphore until semaphore has value 0 */
static void bsem_wait(bsem* bsem_p) {
pthread_mutex_lock(&bsem_p->mutex);
while (bsem_p->v != 1) {
pthread_cond_wait(&bsem_p->cond, &bsem_p->mutex);
}
bsem_p->v = 0;
pthread_mutex_unlock(&bsem_p->mutex);
}

187
thpool.h Normal file
View File

@ -0,0 +1,187 @@
/**********************************
* @author Johan Hanssen Seferidis
* License: MIT
*
**********************************/
#ifndef _THPOOL_
#define _THPOOL_
#ifdef __cplusplus
extern "C" {
#endif
/* =================================== API ======================================= */
typedef struct thpool_* threadpool;
/**
* @brief Initialize threadpool
*
* Initializes a threadpool. This function will not return until all
* threads have initialized successfully.
*
* @example
*
* ..
* threadpool thpool; //First we declare a threadpool
* thpool = thpool_init(4); //then we initialize it to 4 threads
* ..
*
* @param num_threads number of threads to be created in the threadpool
* @return threadpool created threadpool on success,
* NULL on error
*/
threadpool thpool_init(int num_threads);
/**
* @brief Add work to the job queue
*
* Takes an action and its argument and adds it to the threadpool's job queue.
* If you want to add to work a function with more than one arguments then
* a way to implement this is by passing a pointer to a structure.
*
* NOTICE: You have to cast both the function and argument to not get warnings.
*
* @example
*
* void print_num(int num){
* printf("%d\n", num);
* }
*
* int main() {
* ..
* int a = 10;
* thpool_add_work(thpool, (void*)print_num, (void*)a);
* ..
* }
*
* @param threadpool threadpool to which the work will be added
* @param function_p pointer to function to add as work
* @param arg_p pointer to an argument
* @return 0 on success, -1 otherwise.
*/
int thpool_add_work(threadpool, void (*function_p)(void*), void* arg_p);
/**
* @brief Wait for all queued jobs to finish
*
* Will wait for all jobs - both queued and currently running to finish.
* Once the queue is empty and all work has completed, the calling thread
* (probably the main program) will continue.
*
* Smart polling is used in wait. The polling is initially 0 - meaning that
* there is virtually no polling at all. If after 1 seconds the threads
* haven't finished, the polling interval starts growing exponentially
* until it reaches max_secs seconds. Then it jumps down to a maximum polling
* interval assuming that heavy processing is being used in the threadpool.
*
* @example
*
* ..
* threadpool thpool = thpool_init(4);
* ..
* // Add a bunch of work
* ..
* thpool_wait(thpool);
* puts("All added work has finished");
* ..
*
* @param threadpool the threadpool to wait for
* @return nothing
*/
void thpool_wait(threadpool);
/**
* @brief Pauses all threads immediately
*
* The threads will be paused no matter if they are idle or working.
* The threads return to their previous states once thpool_resume
* is called.
*
* While the thread is being paused, new work can be added.
*
* @example
*
* threadpool thpool = thpool_init(4);
* thpool_pause(thpool);
* ..
* // Add a bunch of work
* ..
* thpool_resume(thpool); // Let the threads start their magic
*
* @param threadpool the threadpool where the threads should be paused
* @return nothing
*/
void thpool_pause(threadpool);
/**
* @brief Unpauses all threads if they are paused
*
* @example
* ..
* thpool_pause(thpool);
* sleep(10); // Delay execution 10 seconds
* thpool_resume(thpool);
* ..
*
* @param threadpool the threadpool where the threads should be unpaused
* @return nothing
*/
void thpool_resume(threadpool);
/**
* @brief Destroy the threadpool
*
* This will wait for the currently active threads to finish and then 'kill'
* the whole threadpool to free up memory.
*
* @example
* int main() {
* threadpool thpool1 = thpool_init(2);
* threadpool thpool2 = thpool_init(2);
* ..
* thpool_destroy(thpool1);
* ..
* return 0;
* }
*
* @param threadpool the threadpool to destroy
* @return nothing
*/
void thpool_destroy(threadpool);
/**
* @brief Show currently working threads
*
* Working threads are the threads that are performing work (not idle).
*
* @example
* int main() {
* threadpool thpool1 = thpool_init(2);
* threadpool thpool2 = thpool_init(2);
* ..
* printf("Working threads: %d\n", thpool_num_threads_working(thpool1));
* ..
* return 0;
* }
*
* @param threadpool the threadpool of interest
* @return integer number of threads working
*/
int thpool_num_threads_working(threadpool);
#ifdef __cplusplus
}
#endif
#endif