metal : minor fixup in FA kernel

ggml-ci
2025-01-07 17:21:46 +00:00 · 2024-11-02 20:41:42 +02:00 · 2024-11-02 20:41:42 +02:00 · 40e717263e
commit 40e717263e
parent 42cadc74bd
1 changed files with 3 additions and 3 deletions
--- a/ggml/src/ggml-metal.metal
+++ b/ggml/src/ggml-metal.metal
@ -2776,11 +2776,11 @@ kernel void kernel_flash_attn_ext_vec_f16(
        const short iv3 = iq3 / rv3;
        // load the queries from shared memory into local memory
-        float4 mq[D4];
+        float4 mq[D4/NW];
        for (short ii = 0; ii < D4; ii += NW) {
            short i = ii + tiisg;
-            mq[i] = (float4) sq4[i];
+            mq[i/NW] = (float4) sq4[i];
        }
        // pointer to the mask
@ -2812,7 +2812,7 @@ kernel void kernel_flash_attn_ext_vec_f16(
                        mk[2] = (float4) pk4[i + 2*(nb11/8)];
                        mk[3] = (float4) pk4[i + 3*(nb11/8)];
-                        mqk += (float4) (mq[i] * mk);
+                        mqk += (float4) (mq[i/NW] * mk);
                    }
                    // reduce the results from the threads in the simdgroup