From 72af25998c65e2fc0affb57347323d58a9781a12 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Fri, 28 Jul 2023 17:12:27 +0300 Subject: [PATCH] Fix misaligned memory access in Q4_1 kernel --- ggml-cuda.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 0638db693..cc874d6be 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1368,7 +1368,9 @@ static __device__ __forceinline__ float vec_dot_q4_1_q8_1( #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics const block_q4_1 * bq4_1 = (const block_q4_1 *) vbq; - const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]); + int vi; + memcpy(&vi, &bq4_1->qs[sizeof(int) * (iqs + 0)], sizeof(vi)); + //const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]); const int ui0 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + 0)]); const int ui1 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + QI4_1)]);