kompute : only try to use Vulkan for LLaMA itself

This commit is contained in:
Cebtenzzre 2023-10-04 16:16:04 -04:00 committed by cebtenzzre
parent bc4b5ed1cb
commit 24a4a5956a

View File

@ -6492,7 +6492,9 @@ struct llama_context * llama_new_context_with_model(
#undef LLAMA_METAL_CHECK_BUF
}
#elif defined(GGML_USE_KOMPUTE)
// TODO(cebtenzzre): we need to check the type of each tensor because Q8_0 is not currently supported
if (ggml_vk_has_device() && params.n_gpu_layers > 0
&& model->arch == LLM_ARCH_LLAMA
&& (model->ftype == LLAMA_FTYPE_ALL_F32
|| model->ftype == LLAMA_FTYPE_MOSTLY_F16
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0