Revert the prompt processing on gpu for now.

Fixes issues #1580 and #1581
This commit is contained in:
Adam Treat 2023-10-27 18:32:51 -04:00 committed by cebtenzzre
parent e006d377dd
commit a5eb001eab

View File

@ -3870,11 +3870,19 @@ static bool llama_eval_internal(
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
}
#elif defined(GGML_USE_KOMPUTE)
if (lctx.ctx_kompute) {
if (lctx.ctx_kompute && N == 1) {
ggml_vk_graph_compute(lctx.ctx_kompute, gf);
ggml_vk_d2h_tensor(lctx.ctx_kompute, res);
} else {
if (lctx.ctx_kompute) {
ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.k);
ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.v);
}
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
if (lctx.ctx_kompute) {
ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.k);
ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.v);
}
}
#else
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);