From 3327d84a7fba14ad0b2778982013a88c808a1132 Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Wed, 11 Oct 2023 16:02:53 -0700 Subject: [PATCH] perf: use bigger threadgroups in mm --- ggml-vulkan.cpp | 2 +- kompute/op_mul_mat_mat_q4_0.comp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index a0a2a9b0e..57813cb3d 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -1148,7 +1148,7 @@ void ggml_vk_mul_mat_mat_q4_x(const std::vector& spirv, } else { s_algo = komputeManager()->getAlgorithm(__func__); s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned(ne01), + s_algo->setWorkgroup({unsigned(ne01)/32, unsigned(ne11), unsigned(std::max(ne12, ne02)), }); diff --git a/kompute/op_mul_mat_mat_q4_0.comp b/kompute/op_mul_mat_mat_q4_0.comp index 93dcfdaed..994aadc8a 100644 --- a/kompute/op_mul_mat_mat_q4_0.comp +++ b/kompute/op_mul_mat_mat_q4_0.comp @@ -14,7 +14,7 @@ #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_EXT_debug_printf : enable -// layout(local_size_x = 8) in; +layout(local_size_x = 32) in; layout(binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; layout(binding = 1) readonly buffer tensorInB { float inB[]; };