From 24a4a5956af130148d6cee6bdb5397bf3e5ce824 Mon Sep 17 00:00:00 2001
From: Cebtenzzre <cebtenzzre@gmail.com>
Date: Wed, 4 Oct 2023 16:16:04 -0400
Subject: [PATCH] kompute : only try to use Vulkan for LLaMA itself

---
 llama.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llama.cpp b/llama.cpp
index 603f7cc64..6e7a53407 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6492,7 +6492,9 @@ struct llama_context * llama_new_context_with_model(
 #undef LLAMA_METAL_CHECK_BUF
         }
 #elif defined(GGML_USE_KOMPUTE)
+    // TODO(cebtenzzre): we need to check the type of each tensor because Q8_0 is not currently supported
     if (ggml_vk_has_device() && params.n_gpu_layers > 0
+        && model->arch == LLM_ARCH_LLAMA
         && (model->ftype == LLAMA_FTYPE_ALL_F32
             || model->ftype == LLAMA_FTYPE_MOSTLY_F16
             || model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0