From 3327d84a7fba14ad0b2778982013a88c808a1132 Mon Sep 17 00:00:00 2001
From: Aaron Miller <apage43@ninjawhale.com>
Date: Wed, 11 Oct 2023 16:02:53 -0700
Subject: [PATCH] perf: use bigger threadgroups in mm

---
 ggml-vulkan.cpp                  | 2 +-
 kompute/op_mul_mat_mat_q4_0.comp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
index a0a2a9b0e..57813cb3d 100644
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@@ -1148,7 +1148,7 @@ void ggml_vk_mul_mat_mat_q4_x(const std::vector<uint32_t>& spirv,
     } else {
         s_algo = komputeManager()->getAlgorithm(__func__);
         s_algo->setTensors({inA, inB, out});
-        s_algo->setWorkgroup({unsigned(ne01),
+        s_algo->setWorkgroup({unsigned(ne01)/32,
                               unsigned(ne11),
                               unsigned(std::max(ne12, ne02)),
                               });
diff --git a/kompute/op_mul_mat_mat_q4_0.comp b/kompute/op_mul_mat_mat_q4_0.comp
index 93dcfdaed..994aadc8a 100644
--- a/kompute/op_mul_mat_mat_q4_0.comp
+++ b/kompute/op_mul_mat_mat_q4_0.comp
@@ -14,7 +14,7 @@
 #extension GL_KHR_shader_subgroup_arithmetic : require
 #extension GL_EXT_debug_printf : enable
 
-// layout(local_size_x = 8) in;
+layout(local_size_x = 32) in;
 
 layout(binding = 0) readonly buffer tensorInA { uint8_t inA[]; };
 layout(binding = 1) readonly buffer tensorInB { float inB[]; };