From 8dc79ac380942a8a0006ff7123d1b126130cba3c Mon Sep 17 00:00:00 2001
From: Aaron Miller <apage43@ninjawhale.com>
Date: Thu, 12 Oct 2023 11:46:30 -0700
Subject: [PATCH] clean up vulkan/cpu switch

---
 llama.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 0ff459ba5..3afbebe2a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3855,19 +3855,11 @@ static bool llama_eval_internal(
         ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
     }
 #elif defined(GGML_USE_KOMPUTE)
-    if (lctx.ctx_kompute) { // && N == 1) {
+    if (lctx.ctx_kompute) {
         ggml_vk_graph_compute(lctx.ctx_kompute, gf);
         ggml_vk_d2h_tensor(lctx.ctx_kompute, res);
     } else {
-        if (lctx.ctx_kompute) {
-            ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.k);
-            ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.v);
-        }
         ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
-        if (lctx.ctx_kompute) {
-            ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.k);
-            ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.v);
-        }
     }
 #else
     ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);