From 932589c0efe73480b22eca6a1e2a01c230ef5258 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Sat, 14 Oct 2023 03:12:10 +0300 Subject: [PATCH] Honor -ngl option for Cuda offloading in llava --- examples/llava/llava.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 14dacc780..8384d9d78 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -79,7 +79,13 @@ int main(int argc, char ** argv) { llama_backend_init(params.numa); - llama_model_params model_params = llama_model_default_params(); + llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = params.n_gpu_layers; + model_params.main_gpu = params.main_gpu; + model_params.tensor_split = params.tensor_split; + model_params.use_mmap = params.use_mmap; + model_params.use_mlock = params.use_mlock; + llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params); if (model == NULL) { fprintf(stderr , "%s: error: unable to load model\n" , __func__);