From 05697f670b1ea28b80c39854832ea53527f75c55 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 4 Nov 2024 13:49:34 +0200 Subject: [PATCH] metal : simplify f16 and f32 dequant kernels (#0) --- ggml/src/ggml-metal.metal | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-metal.metal b/ggml/src/ggml-metal.metal index 3eb976633..ff9d37490 100644 --- a/ggml/src/ggml-metal.metal +++ b/ggml/src/ggml-metal.metal @@ -19,18 +19,12 @@ constexpr constant static float kvalues_iq4nl_f[16] = { // NOTE: this is not dequantizing - we are simply fitting the template template void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) { - float4x4 temp = *(((device float4x4 *)src)); - for (int i = 0; i < 16; i++){ - reg[i/4][i%4] = temp[i/4][i%4]; - } + reg = (type4x4)(*src); } template void dequantize_f16(device const half4x4 * src, short il, thread type4x4 & reg) { - half4x4 temp = *(((device half4x4 *)src)); - for (int i = 0; i < 16; i++){ - reg[i/4][i%4] = temp[i/4][i%4]; - } + reg = (type4x4)(*src); } template