2023-06-14 10:33:14 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "ggml.h"
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(_WIN32)
|
|
|
|
typedef int ggml_thread_ret_t;
|
|
|
|
#else
|
|
|
|
typedef void *ggml_thread_ret_t;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct ggml_threading_context;
|
|
|
|
|
|
|
|
// Optional (experimental) features.
|
|
|
|
enum ggml_threading_features {
|
|
|
|
GGML_THREADING_FEATURE_NONE = 0,
|
|
|
|
GGML_THREADING_FEATURE_WAIT_ON_DONE = 1 << 0,
|
|
|
|
GGML_THREADING_FEATURE_PERF = 1 << 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
// The thread runner to feed into OS threads.
|
|
|
|
typedef ggml_thread_ret_t(ggml_threading_thread_runner)(void *data);
|
|
|
|
|
|
|
|
// Init and start underlying workers if n_threads > 1.
|
|
|
|
//
|
2023-06-16 12:32:12 +00:00
|
|
|
// thread: optional OS thread runner, default value:
|
|
|
|
// `ggml_threading_graph_compute_thread`.
|
|
|
|
//
|
|
|
|
// features: optional for configure
|
|
|
|
// threading additional features. see `ggml_threading_feature`, default 0.
|
|
|
|
//
|
2023-06-14 10:33:14 +00:00
|
|
|
// stages_time: optional for collecting per-stage wall clock time.
|
|
|
|
struct ggml_threading_context *
|
|
|
|
ggml_threading_start(int n_threads, ggml_threading_thread_runner *thread,
|
2023-06-16 12:32:12 +00:00
|
|
|
ggml_task_runner *task_runner,
|
2023-06-14 10:33:14 +00:00
|
|
|
enum ggml_threading_features features,
|
|
|
|
int64_t stages_time[3]);
|
|
|
|
|
|
|
|
// Stop workers (if exist), free memories (including the ctx).
|
|
|
|
void ggml_threading_stop(struct ggml_threading_context *ctx);
|
|
|
|
|
|
|
|
// The default implementation of `ggml_threading_thread_runner`
|
|
|
|
ggml_thread_ret_t ggml_threading_graph_compute_thread(void *data);
|
|
|
|
|
|
|
|
// Compute a tensor. It computes the enabled task stages one by one.
|
|
|
|
// Caller should take care of the return error: retry for fallback error.
|
|
|
|
enum ggml_compute_error
|
|
|
|
ggml_threading_compute_tensor(struct ggml_threading_context *ctx,
|
|
|
|
struct ggml_tensor *node, void *wdata,
|
|
|
|
size_t wsize);
|
|
|
|
|
|
|
|
// This is an experimental functionality for mulmat tune, as a thin wrapper.
|
|
|
|
enum ggml_compute_error
|
2023-06-16 12:32:12 +00:00
|
|
|
ggml_compute_forward_wrapper(const struct ggml_compute_params *params,
|
2023-06-14 10:33:14 +00:00
|
|
|
struct ggml_tensor *tensor);
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|