llama.cpp/ggml/include/ggml-cpu.h

#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef  __cplusplus
extern "C" {
#endif

    // the compute plan that needs to be prepared for ggml_graph_compute()
    // since https://github.com/ggerganov/ggml/issues/287
    struct ggml_cplan {
        size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
        uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`

        int n_threads;
        struct ggml_threadpool * threadpool;

        // abort ggml_graph_compute when true
        ggml_abort_callback abort_callback;
        void *              abort_callback_data;
    };

    // numa strategies
    enum ggml_numa_strategy {
        GGML_NUMA_STRATEGY_DISABLED   = 0,
        GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
        GGML_NUMA_STRATEGY_ISOLATE    = 2,
        GGML_NUMA_STRATEGY_NUMACTL    = 3,
        GGML_NUMA_STRATEGY_MIRROR     = 4,
        GGML_NUMA_STRATEGY_COUNT
    };

    GGML_BACKEND_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
    GGML_BACKEND_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node

    GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
    GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);

    GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
    GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);

    GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
    GGML_BACKEND_API void    ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);

    GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
    GGML_BACKEND_API void    ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);

    GGML_BACKEND_API float   ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
    GGML_BACKEND_API void    ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);

    GGML_BACKEND_API float   ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
    GGML_BACKEND_API void    ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);

    GGML_BACKEND_API struct ggml_threadpool *      ggml_threadpool_new           (struct ggml_threadpool_params  * params);
    GGML_BACKEND_API void                          ggml_threadpool_free          (struct ggml_threadpool * threadpool);
    GGML_BACKEND_API int                           ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
    GGML_BACKEND_API void                          ggml_threadpool_pause         (struct ggml_threadpool * threadpool);
    GGML_BACKEND_API void                          ggml_threadpool_resume        (struct ggml_threadpool * threadpool);

    // ggml_graph_plan() has to be called before ggml_graph_compute()
    // when plan.work_size > 0, caller must allocate memory for plan.work_data
    GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
                  const struct ggml_cgraph * cgraph,
                                       int   n_threads, /* = GGML_DEFAULT_N_THREADS */
                    struct ggml_threadpool * threadpool /* = NULL */ );
    GGML_BACKEND_API enum ggml_status  ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);

    // same as ggml_graph_compute() but the work data is allocated as a part of the context
    // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
    GGML_BACKEND_API enum ggml_status  ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);

    //
    // system info
    //

    // x86
    GGML_BACKEND_API int ggml_cpu_has_sse3       (void);
    GGML_BACKEND_API int ggml_cpu_has_ssse3      (void);
    GGML_BACKEND_API int ggml_cpu_has_avx        (void);
    GGML_BACKEND_API int ggml_cpu_has_avx_vnni   (void);
    GGML_BACKEND_API int ggml_cpu_has_avx2       (void);
    GGML_BACKEND_API int ggml_cpu_has_f16c       (void);
    GGML_BACKEND_API int ggml_cpu_has_fma        (void);
    GGML_BACKEND_API int ggml_cpu_has_avx512     (void);
    GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
    GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
    GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
    GGML_BACKEND_API int ggml_cpu_has_amx_int8   (void);
    // ARM
    GGML_BACKEND_API int ggml_cpu_has_neon       (void);
    GGML_BACKEND_API int ggml_cpu_has_arm_fma    (void);
    GGML_BACKEND_API int ggml_cpu_has_fp16_va    (void);
    GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
    GGML_BACKEND_API int ggml_cpu_has_sve        (void);
    GGML_BACKEND_API int ggml_cpu_get_sve_cnt    (void);  // sve vector length in bytes
    // other
    GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
    GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
    GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
    GGML_BACKEND_API int ggml_cpu_has_llamafile  (void);

    // Internal types and functions exposed for tests and benchmarks

    typedef void (*ggml_from_float_to_mat_t)
                                     (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
    typedef void (*ggml_vec_dot_t)  (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
                                       const void * GGML_RESTRICT y, size_t by, int nrc);
    typedef void (*ggml_gemv_t)     (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
                                       const void * GGML_RESTRICT y, int nr, int nc);
    typedef void (*ggml_gemm_t)     (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
                                       const void * GGML_RESTRICT y, int nr, int nc);

    struct ggml_type_traits_cpu {
        ggml_from_float_t        from_float;
        ggml_from_float_to_mat_t from_float_to_mat;
        ggml_vec_dot_t           vec_dot;
        enum ggml_type           vec_dot_type;
        int64_t                  nrows; // number of rows to process simultaneously
        int64_t                  ncols; // number of columns to process simultaneously
        ggml_gemv_t              gemv;
        ggml_gemm_t              gemm;
    };

    GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);

    GGML_BACKEND_API void ggml_cpu_init(void);

    //
    // CPU backend
    //

    GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);

    GGML_BACKEND_API bool ggml_backend_is_cpu                (ggml_backend_t backend);
    GGML_BACKEND_API void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads);
    GGML_BACKEND_API void ggml_backend_cpu_set_threadpool    (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
    GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);

    GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);

#ifdef GGML_USE_CPU_HBM
    GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
#endif

    GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
    GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);

#ifdef __cplusplus
}
#endif
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00			`#pragma once`

			`#include "ggml.h"`
			`#include "ggml-backend.h"`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`// the compute plan that needs to be prepared for ggml_graph_compute()`
			`// since https://github.com/ggerganov/ggml/issues/287`
			`struct ggml_cplan {`
			size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
			uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`

			`int n_threads;`
			`struct ggml_threadpool * threadpool;`

			`// abort ggml_graph_compute when true`
			`ggml_abort_callback abort_callback;`
			`void * abort_callback_data;`
			`};`

			`// numa strategies`
			`enum ggml_numa_strategy {`
			`GGML_NUMA_STRATEGY_DISABLED = 0,`
			`GGML_NUMA_STRATEGY_DISTRIBUTE = 1,`
			`GGML_NUMA_STRATEGY_ISOLATE = 2,`
			`GGML_NUMA_STRATEGY_NUMACTL = 3,`
			`GGML_NUMA_STRATEGY_MIRROR = 4,`
			`GGML_NUMA_STRATEGY_COUNT`
			`};`

ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems`
			`GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);`
			`GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);`
			`GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);`
			`GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);`
			`GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);`
			`GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);`
			`GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : add support for dynamic loading of backends (#10469) * ggml : add support for dynamic loading of backends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-11-25 14:13:39 +00:00			`GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);`
			`GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);`
			`GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);`
			`GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);`
			`GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
			`// ggml_graph_plan() has to be called before ggml_graph_compute()`
			`// when plan.work_size > 0, caller must allocate memory for plan.work_data`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00			`const struct ggml_cgraph * cgraph,`
			`int n_threads, /* = GGML_DEFAULT_N_THREADS */`
			`struct ggml_threadpool * threadpool /* = NULL */ );`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
			`// same as ggml_graph_compute() but the work data is allocated as a part of the context`
			`// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`//`
			`// system info`
			`//`

			`// x86`
			`GGML_BACKEND_API int ggml_cpu_has_sse3 (void);`
			`GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);`
			`GGML_BACKEND_API int ggml_cpu_has_avx (void);`
ggml : add support for dynamic loading of backends (#10469) * ggml : add support for dynamic loading of backends --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-11-25 14:13:39 +00:00			`GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API int ggml_cpu_has_avx2 (void);`
			`GGML_BACKEND_API int ggml_cpu_has_f16c (void);`
			`GGML_BACKEND_API int ggml_cpu_has_fma (void);`
			`GGML_BACKEND_API int ggml_cpu_has_avx512 (void);`
			`GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);`
			`GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);`
			`GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);`
			`GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);`
			`// ARM`
			`GGML_BACKEND_API int ggml_cpu_has_neon (void);`
			`GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);`
			`GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);`
			`GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);`
			`GGML_BACKEND_API int ggml_cpu_has_sve (void);`
			`GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes`
			`// other`
			`GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);`
			`GGML_BACKEND_API int ggml_cpu_has_vsx (void);`
			`GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);`
			`GGML_BACKEND_API int ggml_cpu_has_llamafile (void);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
			`// Internal types and functions exposed for tests and benchmarks`

			`typedef void (*ggml_from_float_to_mat_t)`
			`(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);`
			`typedef void (ggml_vec_dot_t) (int n, float GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,`
			`const void * GGML_RESTRICT y, size_t by, int nrc);`
			`typedef void (ggml_gemv_t) (int n, float GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,`
			`const void * GGML_RESTRICT y, int nr, int nc);`
			`typedef void (ggml_gemm_t) (int n, float GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,`
			`const void * GGML_RESTRICT y, int nr, int nc);`

			`struct ggml_type_traits_cpu {`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`ggml_from_float_t from_float;`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00			`ggml_from_float_to_mat_t from_float_to_mat;`
			`ggml_vec_dot_t vec_dot;`
			`enum ggml_type vec_dot_type;`
			`int64_t nrows; // number of rows to process simultaneously`
			`int64_t ncols; // number of columns to process simultaneously`
			`ggml_gemv_t gemv;`
			`ggml_gemm_t gemm;`
			`};`

ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API void ggml_cpu_init(void);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
			`//`
			`// CPU backend`
			`//`

ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);`
			`GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);`
			`GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);`
			`GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00
			`#ifdef GGML_USE_CPU_HBM`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 17:04:35 +00:00			`GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);`
ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00			`#endif`

backend cpu: add online flow for aarch64 Q4_0 GEMV/GEMM kernels (#9921) * backend-cpu: add online flow for aarch64 Q4_0 GEMV/GEMM kernels --------- Co-authored-by: Diego Devesa <slarengh@gmail.com> 2024-11-15 00:28:50 +00:00			`GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);`
			`GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);`

ggml : move CPU backend to a separate file (#10144) 2024-11-03 18:34:08 +00:00			`#ifdef __cplusplus`
			`}`
			`#endif`