mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-05 00:04:36 +00:00
clean up vulkan/cpu switch
This commit is contained in:
parent
cd0257ed0d
commit
8dc79ac380
10
llama.cpp
10
llama.cpp
@ -3855,19 +3855,11 @@ static bool llama_eval_internal(
|
|||||||
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
if (lctx.ctx_kompute) { // && N == 1) {
|
if (lctx.ctx_kompute) {
|
||||||
ggml_vk_graph_compute(lctx.ctx_kompute, gf);
|
ggml_vk_graph_compute(lctx.ctx_kompute, gf);
|
||||||
ggml_vk_d2h_tensor(lctx.ctx_kompute, res);
|
ggml_vk_d2h_tensor(lctx.ctx_kompute, res);
|
||||||
} else {
|
} else {
|
||||||
if (lctx.ctx_kompute) {
|
|
||||||
ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.k);
|
|
||||||
ggml_vk_d2h_tensor(lctx.ctx_kompute, kv_self.v);
|
|
||||||
}
|
|
||||||
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
||||||
if (lctx.ctx_kompute) {
|
|
||||||
ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.k);
|
|
||||||
ggml_vk_h2d_tensor(lctx.ctx_kompute, kv_self.v);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
|
||||||
|
Loading…
Reference in New Issue
Block a user