mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 10:54:36 +00:00
kompute : llama-bench support and ggml_cpu_has_kompute() (#5226)
This commit is contained in:
parent
e0085fdf7c
commit
e8dc55d006
@ -1521,6 +1521,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||||||
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
|
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
|
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
|
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
|
||||||
|
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
|
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
|
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
|
||||||
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");
|
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");
|
||||||
|
@ -563,6 +563,7 @@ struct test {
|
|||||||
static const bool cuda;
|
static const bool cuda;
|
||||||
static const bool opencl;
|
static const bool opencl;
|
||||||
static const bool vulkan;
|
static const bool vulkan;
|
||||||
|
static const bool kompute;
|
||||||
static const bool metal;
|
static const bool metal;
|
||||||
static const bool gpu_blas;
|
static const bool gpu_blas;
|
||||||
static const bool blas;
|
static const bool blas;
|
||||||
@ -647,6 +648,9 @@ struct test {
|
|||||||
if (vulkan) {
|
if (vulkan) {
|
||||||
return "Vulkan";
|
return "Vulkan";
|
||||||
}
|
}
|
||||||
|
if (kompute) {
|
||||||
|
return "Kompute";
|
||||||
|
}
|
||||||
if (metal) {
|
if (metal) {
|
||||||
return "Metal";
|
return "Metal";
|
||||||
}
|
}
|
||||||
@ -662,7 +666,7 @@ struct test {
|
|||||||
static const std::vector<std::string> & get_fields() {
|
static const std::vector<std::string> & get_fields() {
|
||||||
static const std::vector<std::string> fields = {
|
static const std::vector<std::string> fields = {
|
||||||
"build_commit", "build_number",
|
"build_commit", "build_number",
|
||||||
"cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
|
"cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas",
|
||||||
"cpu_info", "gpu_info",
|
"cpu_info", "gpu_info",
|
||||||
"model_filename", "model_type", "model_size", "model_n_params",
|
"model_filename", "model_type", "model_size", "model_n_params",
|
||||||
"n_batch", "n_threads", "type_k", "type_v",
|
"n_batch", "n_threads", "type_k", "type_v",
|
||||||
@ -686,8 +690,9 @@ struct test {
|
|||||||
field == "avg_ns" || field == "stddev_ns") {
|
field == "avg_ns" || field == "stddev_ns") {
|
||||||
return INT;
|
return INT;
|
||||||
}
|
}
|
||||||
if (field == "cuda" || field == "opencl" || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
|
if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
|
||||||
field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
|
field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" ||
|
||||||
|
field == "mul_mat_q") {
|
||||||
return BOOL;
|
return BOOL;
|
||||||
}
|
}
|
||||||
if (field == "avg_ts" || field == "stddev_ts") {
|
if (field == "avg_ts" || field == "stddev_ts") {
|
||||||
@ -714,7 +719,8 @@ struct test {
|
|||||||
}
|
}
|
||||||
std::vector<std::string> values = {
|
std::vector<std::string> values = {
|
||||||
build_commit, std::to_string(build_number),
|
build_commit, std::to_string(build_number),
|
||||||
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
|
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
|
||||||
|
std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
|
||||||
cpu_info, gpu_info,
|
cpu_info, gpu_info,
|
||||||
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
||||||
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
||||||
@ -743,6 +749,7 @@ const int test::build_number = LLAMA_BUILD_NUMBER;
|
|||||||
const bool test::cuda = !!ggml_cpu_has_cublas();
|
const bool test::cuda = !!ggml_cpu_has_cublas();
|
||||||
const bool test::opencl = !!ggml_cpu_has_clblast();
|
const bool test::opencl = !!ggml_cpu_has_clblast();
|
||||||
const bool test::vulkan = !!ggml_cpu_has_vulkan();
|
const bool test::vulkan = !!ggml_cpu_has_vulkan();
|
||||||
|
const bool test::kompute = !!ggml_cpu_has_kompute();
|
||||||
const bool test::metal = !!ggml_cpu_has_metal();
|
const bool test::metal = !!ggml_cpu_has_metal();
|
||||||
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
|
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
|
||||||
const bool test::blas = !!ggml_cpu_has_blas();
|
const bool test::blas = !!ggml_cpu_has_blas();
|
||||||
|
11
ggml.c
11
ggml.c
@ -20473,6 +20473,14 @@ int ggml_cpu_has_vulkan(void) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_cpu_has_kompute(void) {
|
||||||
|
#if defined(GGML_USE_KOMPUTE)
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_sycl(void) {
|
int ggml_cpu_has_sycl(void) {
|
||||||
#if defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_SYCL)
|
||||||
return 1;
|
return 1;
|
||||||
@ -20482,7 +20490,8 @@ int ggml_cpu_has_sycl(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_gpublas(void) {
|
int ggml_cpu_has_gpublas(void) {
|
||||||
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_sycl();
|
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
|
||||||
|
ggml_cpu_has_sycl();
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_sse3(void) {
|
int ggml_cpu_has_sse3(void) {
|
||||||
|
1
ggml.h
1
ggml.h
@ -2266,6 +2266,7 @@ extern "C" {
|
|||||||
GGML_API int ggml_cpu_has_cublas (void);
|
GGML_API int ggml_cpu_has_cublas (void);
|
||||||
GGML_API int ggml_cpu_has_clblast (void);
|
GGML_API int ggml_cpu_has_clblast (void);
|
||||||
GGML_API int ggml_cpu_has_vulkan (void);
|
GGML_API int ggml_cpu_has_vulkan (void);
|
||||||
|
GGML_API int ggml_cpu_has_kompute (void);
|
||||||
GGML_API int ggml_cpu_has_gpublas (void);
|
GGML_API int ggml_cpu_has_gpublas (void);
|
||||||
GGML_API int ggml_cpu_has_sse3 (void);
|
GGML_API int ggml_cpu_has_sse3 (void);
|
||||||
GGML_API int ggml_cpu_has_ssse3 (void);
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
||||||
|
@ -6878,11 +6878,6 @@ static int llama_decode_internal(
|
|||||||
n_threads = std::min(4, n_threads);
|
n_threads = std::min(4, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
|
|
||||||
if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
|
|
||||||
n_threads = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef GGML_USE_MPI
|
#ifdef GGML_USE_MPI
|
||||||
const int64_t n_layer = hparams.n_layer;
|
const int64_t n_layer = hparams.n_layer;
|
||||||
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
||||||
|
Loading…
Reference in New Issue
Block a user