vulkan: only use M-sized matmul on Apple GPUs (#5412)

* vulkan: refactor guess_matmul_pipeline for vendor Refactor ggml_vk_guess_matmul_pipeline to simplify adding per-vendor conditionals. Signed-off-by: Sergio Lopez <slp@redhat.com> * vulkan: only use M-sized matmul on Apple GPUs L-sized and S-sized matmuls are broken on Apple GPUs, force using M-size with this vendor. Signed-off-by: Sergio Lopez <slp@redhat.com> --------- Signed-off-by: Sergio Lopez <slp@redhat.com>
2024-12-26 03:14:35 +00:00 · 2024-02-11 15:12:00 +01:00 · 2024-02-11 15:12:00 +01:00 · c88c74f967
commit c88c74f967
parent a803333a4e
1 changed files with 93 additions and 10 deletions
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@ -27,6 +27,7 @@
 #define CEIL_DIV(M, N) (((M) + (N)-1) / (N))
 #define VK_VENDOR_ID_AMD 0x1002
 #define VK_VENDOR_ID_APPLE 0x106b
 #define VK_VENDOR_ID_INTEL 0x8086
 #define VK_VENDOR_ID_NVIDIA 0x10de
@ -2034,18 +2035,100 @@ static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ct
    return ctx->pipeline_matmul_f32_aligned_l.align;
 }
-static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
+static vk_pipeline* ggml_vk_guess_matmul_pipeline_amd(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")";
 #endif
    if (bit16_x && bit16_y) {
-        if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+        if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
            return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
        }
-        if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+#ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
        return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m;
    }
    if (bit16_x && !bit16_y) {
        if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
            return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
        }
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
        return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m;
    }
    if (!bit16_x && bit16_y) {
        GGML_ASSERT(false);
    }
    if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
        return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
    }
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
    return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m;
 }
 static vk_pipeline* ggml_vk_guess_matmul_pipeline_apple(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
    if (bit16_x && bit16_y) {
        return aligned ? &ctx->pipeline_matmul_f16_aligned_m : &ctx->pipeline_matmul_f16_m;
    }
    if (bit16_x && !bit16_y) {
        return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_m : &ctx->pipeline_matmul_f16_f32_m;
    }
    if (!bit16_x && bit16_y) {
        GGML_ASSERT(false);
    }
    return aligned ? &ctx->pipeline_matmul_f32_aligned_m : &ctx->pipeline_matmul_f32_m;
 }
 static vk_pipeline* ggml_vk_guess_matmul_pipeline_intel(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, bool aligned) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
    if (bit16_x && bit16_y) {
        return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
    }
    if (bit16_x && !bit16_y) {
        return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
    }
    if (!bit16_x && bit16_y) {
        GGML_ASSERT(false);
    }
    return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
 }
 static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, bool bit16_x, bool bit16_y, int m, int n, bool aligned) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << "ggml_vk_guess_matmul_pipeline(" << bit16_x << ", " << bit16_y << ", " << m << ", " << n << ", " << aligned << ")";
 #endif
    switch (ctx->device.lock()->vendor_id) {
    case VK_VENDOR_ID_AMD:
        return ggml_vk_guess_matmul_pipeline_amd(ctx, bit16_x, bit16_y, m, n, aligned);
    case VK_VENDOR_ID_APPLE:
        return ggml_vk_guess_matmul_pipeline_apple(ctx, bit16_x, bit16_y, aligned);
    case VK_VENDOR_ID_INTEL:
        return ggml_vk_guess_matmul_pipeline_intel(ctx, bit16_x, bit16_y, aligned);
    }
    if (bit16_x && bit16_y) {
        if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
            return aligned ? &ctx->pipeline_matmul_f16_aligned_s : &ctx->pipeline_matmul_f16_s;
        }
        if (m <= 64 || n <= 64) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
@ -2057,13 +2140,13 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx,
        return aligned ? &ctx->pipeline_matmul_f16_aligned_l : &ctx->pipeline_matmul_f16_l;
    }
    if (bit16_x && !bit16_y) {
-        if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+        if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
            return aligned ? &ctx->pipeline_matmul_f16_f32_aligned_s : &ctx->pipeline_matmul_f16_f32_s;
        }
-        if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+        if (m <= 64 || n <= 64) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif
@ -2078,13 +2161,13 @@ static vk_pipeline* ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx,
        GGML_ASSERT(false);
    }
-    if (ctx->device.lock()->vendor_id == VK_VENDOR_ID_INTEL || m <= 32 || n <= 32) {
+    if (m <= 32 || n <= 32) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " S" << std::endl;
 #endif
        return aligned ? &ctx->pipeline_matmul_f32_aligned_s : &ctx->pipeline_matmul_f32_s;
    }
-    if (ctx->device.lock()->subgroup_size == 64 || m <= 64 || n <= 64) {
+    if (m <= 64 || n <= 64) {
 #ifdef GGML_VULKAN_DEBUG
    std::cerr << " M" << std::endl;
 #endif