From 0640427f7b0343cf8832589f912407d19510870f Mon Sep 17 00:00:00 2001 From: Alan Gray Date: Thu, 25 Apr 2024 00:51:48 -0700 Subject: [PATCH] limit to GGML_ALLOW_CUDA_GRAPHS defined in llama.cpp cmake --- CMakeLists.txt | 1 + ggml-cuda.cu | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f134a153b..a5a230492 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -413,6 +413,7 @@ if (LLAMA_CUDA) list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") add_compile_definitions(GGML_USE_CUDA) + add_compile_definitions(GGML_ALLOW_CUDA_GRAPHS) if (LLAMA_CUDA_FORCE_DMMV) add_compile_definitions(GGML_CUDA_FORCE_DMMV) endif() diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 46cbb7c7d..a63b9b554 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2405,7 +2405,7 @@ GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) { GGML_UNUSED(backend); } -#if (CUDART_VERSION >= 12000) +#if (CUDART_VERSION >= 12000) && defined(GGML_ALLOW_CUDA_GRAPHS) #define USE_CUDA_GRAPH #endif