mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-11 13:30:35 +00:00
[SYCL] Disable iqx on windows as WA (#6435)
* disable iqx on windows as WA * array instead of global_memory
This commit is contained in:
parent
f87f7b8986
commit
52604860f9
@ -447,10 +447,11 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#elif defined(GGML_COMMON_IMPL_SYCL)
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
|
||||
#define GGML_TABLE_END() });
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
||||
#define GGML_TABLE_END() };
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#endif
|
||||
|
152
ggml-sycl.cpp
152
ggml-sycl.cpp
@ -8079,7 +8079,7 @@ template <bool need_check> static void
|
||||
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
|
||||
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
|
||||
const sycl::nd_item<3> &item_ct1,
|
||||
const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) {
|
||||
const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) {
|
||||
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
|
||||
item_ct1.get_local_id(1);
|
||||
|
||||
@ -9956,17 +9956,14 @@ static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq2xxs_grid.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
@ -9985,17 +9982,14 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq2xs_grid.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
@ -10014,17 +10008,14 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
@ -10043,17 +10034,14 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq3s_grid.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
@ -10072,17 +10060,14 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq1s_grid_gpu.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
dpct::has_capability_or_fail(stream->get_device(),
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||
sycl::range<3>(1, 1, 32),
|
||||
@ -10415,12 +10400,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10428,8 +10409,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
|
||||
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10444,12 +10424,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10457,8 +10433,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
|
||||
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10473,12 +10448,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10486,8 +10457,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
|
||||
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10502,12 +10472,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10515,8 +10481,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
|
||||
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10531,12 +10496,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10544,8 +10505,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
|
||||
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10560,12 +10520,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10573,8 +10529,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
|
||||
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10589,12 +10544,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10602,8 +10553,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
|
||||
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10618,12 +10568,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10631,8 +10577,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
|
||||
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10647,12 +10592,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10660,8 +10601,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
|
||||
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -10676,12 +10616,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10689,13 +10625,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
||||
[[intel::reqd_sub_group_size(32)]] {
|
||||
mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
|
||||
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
|
||||
vx, vy, dst, ncols, nrows, item_ct1,
|
||||
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
||||
vx, vy, dst, ncols, nrows, item_ct1);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||
float *dst, const int ncols,
|
||||
const int nrows,
|
||||
@ -10705,15 +10641,11 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq2xxs_grid.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
||||
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10736,12 +10668,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq2xs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10764,12 +10694,10 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3xxs_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10792,12 +10720,10 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq3s_grid.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
@ -10820,12 +10746,10 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq1s_grid_gpu.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
|
Loading…
Reference in New Issue
Block a user