mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
Tidy ggml-sycl (#5261)
* Tidy some code in ggml-sycl * Remove blank space * Remove std::printf comments --------- Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
This commit is contained in:
parent
6b91b1e0a9
commit
b05102fe8c
@ -1366,6 +1366,7 @@ namespace dpct
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||||
|
GGML_UNUSED(direction);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
#endif // DPCT_USM_LEVEL_NONE
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1667,7 +1668,7 @@ namespace dpct
|
|||||||
using Ty = typename DataType<T>::T2;
|
using Ty = typename DataType<T>::T2;
|
||||||
Ty s_h;
|
Ty s_h;
|
||||||
if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
|
if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
|
||||||
detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host)
|
detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host)
|
||||||
.wait();
|
.wait();
|
||||||
else
|
else
|
||||||
s_h = *reinterpret_cast<const Ty *>(s);
|
s_h = *reinterpret_cast<const Ty *>(s);
|
||||||
@ -1691,6 +1692,20 @@ namespace dpct
|
|||||||
int ldb, const void *beta, void *c, int ldc)
|
int ldb, const void *beta, void *c, int ldc)
|
||||||
{
|
{
|
||||||
#ifndef __INTEL_MKL__
|
#ifndef __INTEL_MKL__
|
||||||
|
GGML_UNUSED(q);
|
||||||
|
GGML_UNUSED(a_trans);
|
||||||
|
GGML_UNUSED(b_trans);
|
||||||
|
GGML_UNUSED(m);
|
||||||
|
GGML_UNUSED(n);
|
||||||
|
GGML_UNUSED(k);
|
||||||
|
GGML_UNUSED(alpha);
|
||||||
|
GGML_UNUSED(a);
|
||||||
|
GGML_UNUSED(lda);
|
||||||
|
GGML_UNUSED(b);
|
||||||
|
GGML_UNUSED(ldb);
|
||||||
|
GGML_UNUSED(beta);
|
||||||
|
GGML_UNUSED(c);
|
||||||
|
GGML_UNUSED(ldc);
|
||||||
throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
|
throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
|
||||||
"Project does not support this API.");
|
"Project does not support this API.");
|
||||||
#else
|
#else
|
||||||
@ -1830,7 +1845,7 @@ namespace dpct
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
|
T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
|
||||||
int logical_sub_group_size = 32)
|
unsigned int logical_sub_group_size = 32)
|
||||||
{
|
{
|
||||||
unsigned int id = g.get_local_linear_id();
|
unsigned int id = g.get_local_linear_id();
|
||||||
unsigned int start_index =
|
unsigned int start_index =
|
||||||
@ -2160,6 +2175,7 @@ namespace dpct
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||||
|
GGML_UNUSED(direction);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
#endif // DPCT_USM_LEVEL_NONE
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3302,7 +3318,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
|
|||||||
std::ofstream logfile;
|
std::ofstream logfile;
|
||||||
logfile.open(filename);
|
logfile.open(filename);
|
||||||
// printf("local buf element %d\n", total_elements);
|
// printf("local buf element %d\n", total_elements);
|
||||||
for(int i=0; i<total_elements; i++){
|
for(size_t i=0; i<total_elements; i++){
|
||||||
if((i+1)%20 ==0) logfile <<std::endl;
|
if((i+1)%20 ==0) logfile <<std::endl;
|
||||||
else logfile << local_buf[i] <<" ";
|
else logfile << local_buf[i] <<" ";
|
||||||
}
|
}
|
||||||
@ -3396,6 +3412,7 @@ static __dpct_inline__ float warp_reduce_max(float x,
|
|||||||
|
|
||||||
static __dpct_inline__ float op_repeat(const float a, const float b) {
|
static __dpct_inline__ float op_repeat(const float a, const float b) {
|
||||||
return b;
|
return b;
|
||||||
|
GGML_UNUSED(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __dpct_inline__ float op_add(const float a, const float b) {
|
static __dpct_inline__ float op_add(const float a, const float b) {
|
||||||
@ -11156,10 +11173,10 @@ DPCT1082:64: Migration of CUmemGenericAllocationHandle type is not supported.
|
|||||||
// g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
|
// g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
|
||||||
static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
|
static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
|
||||||
static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
|
static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
|
||||||
static const size_t SYCL_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
|
|
||||||
|
|
||||||
static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
|
static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
|
||||||
|
GGML_UNUSED(size);
|
||||||
|
GGML_UNUSED(actual_size);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &exc) {
|
catch (sycl::exception const &exc) {
|
||||||
@ -11349,9 +11366,8 @@ void ggml_init_sycl() try {
|
|||||||
if(id!=user_device_id) continue;
|
if(id!=user_device_id) continue;
|
||||||
|
|
||||||
device_inx++;
|
device_inx++;
|
||||||
int device_vmm = 0;
|
|
||||||
|
|
||||||
g_device_caps[device_inx].vmm = !!device_vmm;
|
g_device_caps[device_inx].vmm = 0;
|
||||||
g_device_caps[device_inx].device_id = id;
|
g_device_caps[device_inx].device_id = id;
|
||||||
g_sycl_device_id2index[id].index = device_inx;
|
g_sycl_device_id2index[id].index = device_inx;
|
||||||
|
|
||||||
@ -11359,18 +11375,12 @@ void ggml_init_sycl() try {
|
|||||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||||
prop, dpct::dev_mgr::instance().get_device(id))));
|
prop, dpct::dev_mgr::instance().get_device(id))));
|
||||||
|
|
||||||
// fprintf(stderr,
|
|
||||||
// " Device %d: %s, compute capability %d.%d, VMM: %s\n", id,
|
|
||||||
// prop.get_name(), prop.get_major_version(),
|
|
||||||
// prop.get_minor_version(), device_vmm ? "yes" : "no");
|
|
||||||
|
|
||||||
g_tensor_split[device_inx] = total_vram;
|
g_tensor_split[device_inx] = total_vram;
|
||||||
total_vram += prop.get_global_mem_size();
|
total_vram += prop.get_global_mem_size();
|
||||||
|
|
||||||
g_device_caps[device_inx].cc =
|
g_device_caps[device_inx].cc =
|
||||||
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
||||||
|
|
||||||
// printf("g_device_caps[%d].cc=%d\n", device_inx, g_device_caps[device_inx].cc);
|
|
||||||
}
|
}
|
||||||
device_inx = -1;
|
device_inx = -1;
|
||||||
for (int id = 0; id < g_all_sycl_device_count; ++id) {
|
for (int id = 0; id < g_all_sycl_device_count; ++id) {
|
||||||
@ -12206,7 +12216,6 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|||||||
// ldc == nrows of the matrix that cuBLAS writes into
|
// ldc == nrows of the matrix that cuBLAS writes into
|
||||||
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
|
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
|
||||||
|
|
||||||
const int compute_capability = g_device_caps[id].cc;
|
|
||||||
#ifdef GGML_SYCL_F16
|
#ifdef GGML_SYCL_F16
|
||||||
bool use_fp16 = true; // TODO(Yu) SYCL capability check
|
bool use_fp16 = true; // TODO(Yu) SYCL capability check
|
||||||
#else
|
#else
|
||||||
@ -12691,7 +12700,7 @@ static void ggml_sycl_set_peer_access(const int n_tokens) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int can_access_peer;
|
// int can_access_peer;
|
||||||
// SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
// SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
||||||
// if (can_access_peer) {
|
// if (can_access_peer) {
|
||||||
// if (enable_peer_access) {
|
// if (enable_peer_access) {
|
||||||
@ -12716,7 +12725,6 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|||||||
const int64_t ne01 = src0->ne[1];
|
const int64_t ne01 = src0->ne[1];
|
||||||
const int64_t ne02 = src0->ne[2];
|
const int64_t ne02 = src0->ne[2];
|
||||||
const int64_t ne03 = src0->ne[3];
|
const int64_t ne03 = src0->ne[3];
|
||||||
const int64_t nrows0 = ggml_nrows(src0);
|
|
||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
const int64_t ne10 = src1->ne[0];
|
||||||
const int64_t ne11 = src1->ne[1];
|
const int64_t ne11 = src1->ne[1];
|
||||||
@ -13812,13 +13820,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|||||||
src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
|
src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
|
||||||
dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get();
|
dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get();
|
||||||
|
|
||||||
const dpct::memcpy_direction src1_kind =
|
|
||||||
src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device
|
|
||||||
: dpct::device_to_device;
|
|
||||||
const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU
|
|
||||||
? dpct::device_to_host
|
|
||||||
: dpct::device_to_device;
|
|
||||||
|
|
||||||
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
||||||
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user