mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 11:11:46 +00:00
138 lines
3.3 KiB
C
138 lines
3.3 KiB
C
#pragma once
|
|
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "ggml.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define GGML_MULMAT_TUNE_VERSION 8
|
|
#define GGML_MULMAT_N_SHAPES 6
|
|
|
|
#define GGML_MULMAT_MAX_PASS 3
|
|
|
|
struct ggml_mulmat_tune_m {
|
|
int M;
|
|
|
|
int stages_time[3];
|
|
};
|
|
|
|
struct ggml_mulmat_tune_model {
|
|
const char *name;
|
|
|
|
enum ggml_ftype ftype;
|
|
|
|
int n_vocab;
|
|
|
|
int n_embd;
|
|
|
|
// n_ff = ((2*(4*n_embd)/3 + n_mult - 1)/n_mult)*n_mult
|
|
int n_ff;
|
|
|
|
// n_rot = n_embd/n_head;
|
|
int n_rot;
|
|
};
|
|
|
|
struct ggml_mulmat_tune_shape {
|
|
// For RoPE, one of N / K is 0.
|
|
int N;
|
|
int K;
|
|
|
|
enum ggml_type src0_type;
|
|
enum ggml_type src1_type;
|
|
|
|
int n_profiles;
|
|
struct ggml_task_profile *profiles;
|
|
|
|
int m_num;
|
|
int *arr_m;
|
|
|
|
struct ggml_mulmat_tune_m *items;
|
|
};
|
|
|
|
struct ggml_mulmat_tune {
|
|
int version;
|
|
|
|
char model[16];
|
|
|
|
enum ggml_ftype ftype;
|
|
|
|
int n_shapes;
|
|
// Given N/K, we bench for mul_mat [M,K] x [K,N].
|
|
struct ggml_mulmat_tune_shape shapes[GGML_MULMAT_N_SHAPES];
|
|
|
|
int n_threads;
|
|
};
|
|
|
|
struct ggml_mulmat_tune_time {
|
|
struct ggml_task_profile *profile;
|
|
int stage_time[3];
|
|
int total_time;
|
|
};
|
|
|
|
struct mm_cache_element {
|
|
int M;
|
|
int N;
|
|
int K;
|
|
struct ggml_task_profile *profile;
|
|
int stages_time[3];
|
|
};
|
|
|
|
// params for tune/bench.
|
|
struct ggml_mulmat_tune_params {
|
|
struct ggml_mulmat_tune_model model;
|
|
int m_num;
|
|
int n_pass;
|
|
int n_threads;
|
|
bool progress; // print and clear '.'
|
|
bool output_console; // also print result to console
|
|
const char *fname;
|
|
};
|
|
|
|
// NOTE: stages_time is filled if not null.
|
|
const struct ggml_task_profile *
|
|
ggml_mulmat_tune_select_task_profile(struct ggml_mulmat_tune *tune, int M,
|
|
int N, int K, enum ggml_type src0_t,
|
|
enum ggml_type src1_t, int stages_time[3]);
|
|
|
|
bool ggml_mulmat_tune_validate(const struct ggml_mulmat_tune *tune,
|
|
const char *model_name, int ftype,
|
|
int n_threads);
|
|
|
|
void ggml_mulmat_tune_model_init(struct ggml_mulmat_tune_model *model,
|
|
const char *name, enum ggml_ftype ftype);
|
|
|
|
bool ggml_mulmat_tune_init(struct ggml_mulmat_tune *tune,
|
|
struct ggml_mulmat_tune_params *params,
|
|
struct ggml_task_profile_factory *profile_factory);
|
|
|
|
void ggml_mulmat_tune_free(struct ggml_mulmat_tune *tune);
|
|
|
|
bool ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp);
|
|
|
|
bool ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp);
|
|
|
|
const struct ggml_mulmat_tune_shape *
|
|
ggml_mulmat_tune_get_shape(const struct ggml_mulmat_tune *tune, int N, int K,
|
|
enum ggml_type src0_type, enum ggml_type src1_type);
|
|
|
|
void ggml_mulmat_tune_estimate_time(const struct ggml_mulmat_tune_shape *shape,
|
|
int M,
|
|
struct ggml_mulmat_tune_time *profile_time);
|
|
|
|
const char *ggml_task_backend_name(enum ggml_task_backend backend);
|
|
|
|
int ggml_mulmat_tune_get_builtin_task_backends(
|
|
enum ggml_task_backend *backends);
|
|
|
|
bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
|
|
struct ggml_mulmat_tune_params *params);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|