From 4ed25b2f88e49b48677c100c03cc3d7159782075 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Wed, 13 Sep 2023 20:47:40 -0400 Subject: [PATCH] Sync from device back to host at begin of new prompt. --- llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama.cpp b/llama.cpp index c835c6fd4..45db293be 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3820,6 +3820,10 @@ static bool llama_eval_internal( ggml_vk_graph_compute(lctx.ctx_kompute, gf); ggml_vk_d2h_tensor(lctx.ctx_kompute, res); } else { + if (lctx.ctx_kompute) { + ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.k); + ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.v); + } ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads); if (lctx.ctx_kompute) { ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.k);