mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-05 00:04:36 +00:00
kompute : only try to use Vulkan for LLaMA itself
This commit is contained in:
parent
bc4b5ed1cb
commit
24a4a5956a
@ -6492,7 +6492,9 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
#undef LLAMA_METAL_CHECK_BUF
|
#undef LLAMA_METAL_CHECK_BUF
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
|
// TODO(cebtenzzre): we need to check the type of each tensor because Q8_0 is not currently supported
|
||||||
if (ggml_vk_has_device() && params.n_gpu_layers > 0
|
if (ggml_vk_has_device() && params.n_gpu_layers > 0
|
||||||
|
&& model->arch == LLM_ARCH_LLAMA
|
||||||
&& (model->ftype == LLAMA_FTYPE_ALL_F32
|
&& (model->ftype == LLAMA_FTYPE_ALL_F32
|
||||||
|| model->ftype == LLAMA_FTYPE_MOSTLY_F16
|
|| model->ftype == LLAMA_FTYPE_MOSTLY_F16
|
||||||
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0
|
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0
|
||||||
|
Loading…
Reference in New Issue
Block a user