diff --git a/llama.cpp b/llama.cpp index 73b932a74..98f49abd7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1053,6 +1053,8 @@ static void llama_model_load_internal( fprintf(stderr, "%s: [cublas] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024); } +#else + (void) n_gpu_layers; #endif // loading time will be recalculate after the first eval, so