From 25cfbf6776d44ee428b9414240d7c725b3709258 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 29 Oct 2023 11:12:03 +0200 Subject: [PATCH] llama : fix non-CUDA build --- llama.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 90fc698a9..d9c4fb3e3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5553,6 +5553,13 @@ static struct ggml_cgraph * llama_build_graph( { OFFLOAD_FUNC_NR, "GPU (CUDA) NR" }, { OFFLOAD_FUNC_EMB, "GPU (CUDA) EMB" }, { OFFLOAD_FUNC_OUT, "GPU (CUDA) OUT" }, +#else + { OFFLOAD_FUNC, "CPU" }, + { OFFLOAD_FUNC_KQ, "CPU" }, + { OFFLOAD_FUNC_V, "CPU" }, + { OFFLOAD_FUNC_NR, "CPU" }, + { OFFLOAD_FUNC_EMB, "CPU" }, + { OFFLOAD_FUNC_OUT, "CPU" }, #endif // GGML_USE_CUBLAS }; @@ -5707,6 +5714,12 @@ static struct ggml_cgraph * llama_build_graph( offload_func_t func = ggml_offload_nop; +#ifdef GGML_USE_CUBLAS + static offload_func_t ggml_offload_gpu = ggml_cuda_assign_buffers_no_alloc; +#else + static offload_func_t ggml_offload_gpu = ggml_offload_nop; +#endif + switch (func_e) { case OFFLOAD_FUNC_NOP: case OFFLOAD_FUNC_OUT: func = ggml_offload_nop; break; @@ -5714,7 +5727,7 @@ static struct ggml_cgraph * llama_build_graph( case OFFLOAD_FUNC_KQ: case OFFLOAD_FUNC_V: case OFFLOAD_FUNC_NR: - case OFFLOAD_FUNC_EMB: func = ggml_cuda_assign_buffers_no_alloc; break; + case OFFLOAD_FUNC_EMB: func = ggml_offload_gpu; break; default: GGML_ASSERT(false); }