cuda : less diff in the rope_neox kernel

2024-12-30 21:34:36 +00:00 · 2023-12-17 09:14:29 +02:00 · 2023-12-17 09:14:29 +02:00 · 42e9525884
commit 42e9525884
parent f703ca8a3c
1 changed files with 20 additions and 18 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -4998,7 +4998,15 @@ static __global__ void rope_neox(
    const int ib = col / n_dims;
    const int ic = col % n_dims;

-    if (ib == 0) {
+    if (ib > 0) {
+        const int i = row*ncols + ib*n_dims + ic;
+
+        dst[i + 0] = x[i + 0];
+        dst[i + 1] = x[i + 1];
+
+        return;
+    }
+
    const int i  = row*ncols + ib*n_dims + ic/2;
    const int i2 = row/p_delta_rows;

@ -5015,12 +5023,6 @@ static __global__ void rope_neox(

    dst[i + 0]        = x0*cos_theta - x1*sin_theta;
    dst[i + n_dims/2] = x0*sin_theta + x1*cos_theta;
-    } else {
-        const int i = row*ncols + ib*n_dims + ic;
-
-        dst[i + 0] = x[i + 0];
-        dst[i + 1] = x[i + 1];
-    }
 }

 static __global__ void rope_glm_f32(