mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
Vectorize q load
This commit is contained in:
parent
604ef6bf15
commit
a235b7c532
@ -4329,8 +4329,6 @@ static void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restri
|
|||||||
scales_local[tid] = x[i].scales[tid];
|
scales_local[tid] = x[i].scales[tid];
|
||||||
item_ct1.barrier(sycl::access::fence_space::local_space);
|
item_ct1.barrier(sycl::access::fence_space::local_space);
|
||||||
|
|
||||||
const uint8_t * q = x[i].qs + 32*il + n*ir;
|
|
||||||
|
|
||||||
uint8_t sc, m;
|
uint8_t sc, m;
|
||||||
get_scale_min_k4(is + 0, scales_local, sc, m);
|
get_scale_min_k4(is + 0, scales_local, sc, m);
|
||||||
const float d1 = dall * sc;
|
const float d1 = dall * sc;
|
||||||
@ -4338,9 +4336,11 @@ static void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restri
|
|||||||
get_scale_min_k4(is + 1, scales_local, sc, m);
|
get_scale_min_k4(is + 1, scales_local, sc, m);
|
||||||
const float d2 = dall * sc;
|
const float d2 = dall * sc;
|
||||||
const float m2 = dmin * m;
|
const float m2 = dmin * m;
|
||||||
|
|
||||||
|
sycl::vec<uint8_t, n> q_vec = reinterpret_cast<const sycl::vec<uint8_t, n>*>(x[i].qs + 32*il + n*ir)[0];
|
||||||
for (int l = 0; l < n; ++l) {
|
for (int l = 0; l < n; ++l) {
|
||||||
y[l + 0] = d1 * (q[l] & 0xF) - m1;
|
y[l + 0] = d1 * (q_vec[l] & 0xF) - m1;
|
||||||
y[l +32] = d2 * (q[l] >> 4) - m2;
|
y[l +32] = d2 * (q_vec[l] >> 4) - m2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user