mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-30 13:24:35 +00:00
perf: use bigger threadgroups in mm
This commit is contained in:
parent
46385ee0d5
commit
3327d84a7f
@ -1148,7 +1148,7 @@ void ggml_vk_mul_mat_mat_q4_x(const std::vector<uint32_t>& spirv,
|
||||
} else {
|
||||
s_algo = komputeManager()->getAlgorithm(__func__);
|
||||
s_algo->setTensors({inA, inB, out});
|
||||
s_algo->setWorkgroup({unsigned(ne01),
|
||||
s_algo->setWorkgroup({unsigned(ne01)/32,
|
||||
unsigned(ne11),
|
||||
unsigned(std::max(ne12, ne02)),
|
||||
});
|
||||
|
@ -14,7 +14,7 @@
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#extension GL_EXT_debug_printf : enable
|
||||
|
||||
// layout(local_size_x = 8) in;
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(binding = 0) readonly buffer tensorInA { uint8_t inA[]; };
|
||||
layout(binding = 1) readonly buffer tensorInB { float inB[]; };
|
||||
|
Loading…
Reference in New Issue
Block a user