cuda : less diff in the rope_neox kernel

This commit is contained in:
Georgi Gerganov 2023-12-17 09:14:29 +02:00
parent f703ca8a3c
commit 42e9525884
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -4998,7 +4998,15 @@ static __global__ void rope_neox(
const int ib = col / n_dims;
const int ic = col % n_dims;
if (ib == 0) {
if (ib > 0) {
const int i = row*ncols + ib*n_dims + ic;
dst[i + 0] = x[i + 0];
dst[i + 1] = x[i + 1];
return;
}
const int i = row*ncols + ib*n_dims + ic/2;
const int i2 = row/p_delta_rows;
@ -5015,12 +5023,6 @@ static __global__ void rope_neox(
dst[i + 0] = x0*cos_theta - x1*sin_theta;
dst[i + n_dims/2] = x0*sin_theta + x1*cos_theta;
} else {
const int i = row*ncols + ib*n_dims + ic;
dst[i + 0] = x[i + 0];
dst[i + 1] = x[i + 1];
}
}
static __global__ void rope_glm_f32(