diff --git a/llama.cpp b/llama.cpp index 6e7a53407..e79251194 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6498,8 +6498,7 @@ struct llama_context * llama_new_context_with_model( && (model->ftype == LLAMA_FTYPE_ALL_F32 || model->ftype == LLAMA_FTYPE_MOSTLY_F16 || model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0 - || model->ftype == LLAMA_FTYPE_MOSTLY_Q4_1 - || model->ftype == LLAMA_FTYPE_MOSTLY_Q6_K)) { + || model->ftype == LLAMA_FTYPE_MOSTLY_Q4_1)) { // this allocates all Vulkan resources and memory buffers ctx->ctx_kompute = ggml_vk_init();