Support SYCL device register

This commit is contained in:
arthw 2024-10-20 10:06:51 +08:00
parent 92be9f1216
commit 8233009d4d
5 changed files with 380 additions and 68 deletions

View File

@ -34,6 +34,8 @@ GGML_API void ggml_sycl_get_device_description(int device, char *description, si
GGML_API int ggml_backend_sycl_get_device_count(); GGML_API int ggml_backend_sycl_get_device_count();
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total); GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
GGML_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
// SYCL doesn't support registering host memory, keep here for reference // SYCL doesn't support registering host memory, keep here for reference
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size); // GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer); // GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);

View File

@ -546,6 +546,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
#include "ggml-rpc.h" #include "ggml-rpc.h"
#endif #endif
#ifdef GGML_USE_SYCL
#include "ggml-sycl.h"
#endif
struct ggml_backend_registry { struct ggml_backend_registry {
std::vector<ggml_backend_reg_t> backends; std::vector<ggml_backend_reg_t> backends;
std::vector<ggml_backend_dev_t> devices; std::vector<ggml_backend_dev_t> devices;
@ -563,10 +567,14 @@ struct ggml_backend_registry {
#ifdef GGML_USE_RPC #ifdef GGML_USE_RPC
register_backend(ggml_backend_rpc_reg()); register_backend(ggml_backend_rpc_reg());
#endif #endif
#ifdef GGML_USE_SYCL
register_backend(ggml_backend_sycl_reg());
// printf("zjy sycl ggml_backend_reg_count()=%d\n", ggml_backend_reg_count());
#endif
// TODO: sycl, vulkan, kompute, cann // TODO: sycl, vulkan, kompute, cann
register_backend(ggml_backend_cpu_reg()); register_backend(ggml_backend_cpu_reg());
// printf("zjy cpu ggml_backend_reg_count()=%d\n", ggml_backend_reg_count());
} }
void register_backend(ggml_backend_reg_t reg) { void register_backend(ggml_backend_reg_t reg) {

View File

@ -1736,6 +1736,24 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str()); global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
} }
void print_cpu_detail() {
sycl::device device;
device = sycl::device(sycl::cpu_selector_v);
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_device_info(prop, device)));
std::string name = std::string(prop.get_name());
name = std::regex_replace(name, std::regex("\\(R\\)"), "");
name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
auto global_mem_size = prop.get_global_mem_size()/1000000;
std::string res= "[SYCL] CPU: ["+name+"] Memory: ["+std::to_string(global_mem_size)+"M]\n";
fprintf(stderr, "%s", res.c_str());
}
void ggml_backend_sycl_print_sycl_devices() { void ggml_backend_sycl_print_sycl_devices() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
int device_count = dpct::dev_mgr::instance().device_count(); int device_count = dpct::dev_mgr::instance().device_count();
@ -1756,6 +1774,7 @@ void ggml_backend_sycl_print_sycl_devices() {
} }
} }
static inline int get_sycl_env(const char *env_name, int default_val) { static inline int get_sycl_env(const char *env_name, int default_val) {
char *user_device_string = getenv(env_name); char *user_device_string = getenv(env_name);
int user_number = default_val; int user_number = default_val;
@ -1814,6 +1833,8 @@ catch (sycl::exception const &exc) {
static ggml_sycl_device_info ggml_sycl_init() { static ggml_sycl_device_info ggml_sycl_init() {
ggml_sycl_device_info info = {}; ggml_sycl_device_info info = {};
print_cpu_detail();
info.device_count = dpct::dev_mgr::instance().device_count(); info.device_count = dpct::dev_mgr::instance().device_count();
if (info.device_count == 0) { if (info.device_count == 0) {
fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__); fprintf(stderr, "%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
@ -1833,7 +1854,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
#else #else
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__); fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
#endif #endif
fprintf(stderr, "%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count); fprintf(stderr, "%s: found %d " GGML_SYCL_NAME " devices\n", __func__, info.device_count);
for (int i = 0; i < info.device_count; ++i) { for (int i = 0; i < info.device_count; ++i) {
info.devices[i].vmm = 0; info.devices[i].vmm = 0;
@ -4084,19 +4105,9 @@ catch (sycl::exception const &exc) {
void ggml_backend_sycl_get_device_memory(int device, size_t *free, void ggml_backend_sycl_get_device_memory(int device, size_t *free,
size_t *total) try { size_t *total) try {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory, device=%d\n", device);
ggml_sycl_set_device(device); ggml_sycl_set_device(device);
/*
DPCT1009:218: SYCL uses exceptions to report errors and does not use the
error codes. The original code was commented out and a warning string was
inserted. You need to rewrite this code.
*/
/*
DPCT1106:217: 'cudaMemGetInfo' was migrated with the Intel extensions for
device information which may not be supported by all compilers or runtimes.
You may need to adjust the code.
*/
SYCL_CHECK(CHECK_TRY_ERROR( SYCL_CHECK(CHECK_TRY_ERROR(
dpct::dev_mgr::instance().get_device(device).get_memory_info(*free, *total))); dpct::dev_mgr::instance().get_device(device).get_memory_info(*free, *total)));
} }
@ -4339,11 +4350,16 @@ struct ggml_backend_sycl_buffer_type_context {
queue_ptr stream = nullptr; queue_ptr stream = nullptr;
}; };
static const char * ggml_backend_sycl_buffer_type_name(ggml_backend_buffer_type_t buft) { static const char * ggml_backend_sycl_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
ggml_backend_sycl_buffer_type_context * ctx = (ggml_backend_sycl_buffer_type_context *)buft->context; ggml_backend_sycl_buffer_type_context * ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
return ctx->name.c_str(); return ctx->name.c_str();
} }
static bool ggml_backend_buft_is_sycl(ggml_backend_buffer_type_t buft) {
return buft->iface.get_name == ggml_backend_sycl_buffer_type_get_name;
}
static ggml_backend_buffer_t static ggml_backend_buffer_t
ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
size_t size) try { size_t size) try {
@ -4395,7 +4411,7 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t
} }
static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = { static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
/* .get_name = */ ggml_backend_sycl_buffer_type_name, /* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer, /* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment, /* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size, /* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
@ -4744,12 +4760,16 @@ static struct ggml_backend_buffer_i ggml_backend_sycl_split_buffer_interface = {
/* .reset = */ NULL, /* .reset = */ NULL,
}; };
static const char * ggml_backend_sycl_split_buffer_type_name(ggml_backend_buffer_type_t buft) { static const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
return GGML_SYCL_NAME "_Split"; return GGML_SYCL_NAME "_Split";
UNUSED(buft); UNUSED(buft);
} }
static bool ggml_backend_buft_is_sycl_split(ggml_backend_buffer_type_t buft) {
return buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name;
}
static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { static ggml_backend_buffer_t ggml_backend_sycl_split_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
// since we don't know the exact split after rounding, we cannot allocate the device buffers at this point // since we don't know the exact split after rounding, we cannot allocate the device buffers at this point
// instead, we allocate them for each tensor separately in init_tensor // instead, we allocate them for each tensor separately in init_tensor
@ -4799,7 +4819,7 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type
} }
static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = { static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
/* .get_name = */ ggml_backend_sycl_split_buffer_type_name, /* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer, /* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment, /* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_max_size = */ NULL, // defaults to SIZE_MAX
@ -4849,7 +4869,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * ten
// host buffer type // host buffer type
static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_type_t buft) { static const char * ggml_backend_sycl_host_buffer_get_name(ggml_backend_buffer_type_t buft) {
return GGML_SYCL_NAME "_Host"; return GGML_SYCL_NAME "_Host";
UNUSED(buft); UNUSED(buft);
@ -4886,7 +4906,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = { static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
/* .iface = */ { /* .iface = */ {
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name, /* .get_name = */ ggml_backend_sycl_host_buffer_get_name,
/* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer, /* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
@ -4902,7 +4922,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
// backend // backend
static const char * ggml_backend_sycl_name(ggml_backend_t backend) { static const char * ggml_backend_sycl_get_name(ggml_backend_t backend) {
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
@ -4958,24 +4978,55 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend, //TODO: need to verify
static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend_src,
ggml_backend_t backend_dst,
const ggml_tensor *src, const ggml_tensor *src,
ggml_tensor *dst) try { ggml_tensor *dst) try {
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer)) { ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
/*
DPCT1009:215: SYCL uses exceptions to report errors and does not use the if (!ggml_backend_is_sycl(backend_src) || !ggml_backend_is_sycl(backend_dst)) {
error codes. The original code was commented out and a warning string return false;
was inserted. You need to rewrite this code.
*/
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
dst->data, src->data, ggml_nbytes(dst)).wait()));
return true;
} }
if (!ggml_backend_buffer_is_sycl(src->buffer) || !ggml_backend_buffer_is_sycl(dst->buffer)) {
return false; return false;
} }
// device -> device copy
ggml_backend_sycl_context * sycl_ctx_src = (ggml_backend_sycl_context *)backend_src->context;
ggml_backend_sycl_context * sycl_ctx_dst = (ggml_backend_sycl_context *)backend_dst->context;
ggml_backend_sycl_buffer_context * buf_ctx_src = (ggml_backend_sycl_buffer_context *)buf_src->context;
ggml_backend_sycl_buffer_context * buf_ctx_dst = (ggml_backend_sycl_buffer_context *)buf_dst->context;
if (sycl_ctx_src->device != buf_ctx_src->device || sycl_ctx_dst->device != buf_ctx_dst->device) {
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: backend and buffer devices do not match\n", __func__);
#endif
return false;
}
if (backend_src != backend_dst) {
// copy on src stream
if (sycl_ctx_src->device == sycl_ctx_dst->device) {
SYCL_CHECK(CHECK_TRY_ERROR(sycl_ctx_src->stream()->memcpy(
dst->data, src->data, ggml_nbytes(dst))));
} else {
SYCL_CHECK(CHECK_TRY_ERROR(dpct::async_dpct_memcpy(
dst->data, sycl_ctx_dst->device, src->data,
sycl_ctx_src->device, ggml_nbytes(dst),
*(sycl_ctx_src->stream()))));
}
} else {
// src and dst are on the same backend
SYCL_CHECK(CHECK_TRY_ERROR(sycl_ctx_src->stream()->memcpy(
dst->data, src->data, ggml_nbytes(dst)).wait()));
}
return true;
}
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl; << ", line:" << __LINE__ << std::endl;
@ -5023,7 +5074,7 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
return GGML_STATUS_SUCCESS; return GGML_STATUS_SUCCESS;
} }
static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) { static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
switch (op->op) { switch (op->op) {
case GGML_OP_CONV_TRANSPOSE_1D: case GGML_OP_CONV_TRANSPOSE_1D:
{ {
@ -5167,40 +5218,25 @@ static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_ten
return false; return false;
} }
UNUSED(backend); UNUSED(dev);
}
static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
const int min_batch_size = 32;
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
GGML_UNUSED(backend);
}
static bool ggml_backend_sycl_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
if (buft->iface.get_name != ggml_backend_sycl_buffer_type_name) {
return false;
}
ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
return buft_ctx->device == sycl_ctx->device;
} }
static ggml_backend_i ggml_backend_sycl_interface = { static ggml_backend_i ggml_backend_sycl_interface = {
/* .get_name = */ ggml_backend_sycl_name, /* .get_name = */ ggml_backend_sycl_get_name,
/* .free = */ ggml_backend_sycl_free, /* .free = */ ggml_backend_sycl_free,
/* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type, /* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type,
/* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async, /* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async,
/* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async, /* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async,
/* .cpy_tensor_async = */ NULL, //ggml_backend_sycl_cpy_tensor_async, // TODO: update for the new interface /* .cpy_tensor_async = */ NULL, //ggml_backend_sycl_cpy_tensor_async, TODO: need to verify
/* .synchronize = */ ggml_backend_sycl_synchronize, /* .synchronize = */ ggml_backend_sycl_synchronize,
/* .graph_plan_create = */ NULL, /* .graph_plan_create = */ NULL,
/* .graph_plan_free = */ NULL, /* .graph_plan_free = */ NULL,
/* .graph_plan_update = */ NULL, /* .graph_plan_update = */ NULL,
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_sycl_graph_compute, /* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .supports_op = */ ggml_backend_sycl_supports_op, /* .supports_op = */ NULL,
/* .supports_buft = */ ggml_backend_sycl_supports_buft, /* .supports_buft = */ NULL,
/* .offload_op = */ ggml_backend_sycl_offload_op, /* .offload_op = */ NULL,
/* .event_record = */ NULL, /* .event_record = */ NULL,
/* .event_wait = */ NULL, /* .event_wait = */ NULL,
}; };
@ -5210,6 +5246,236 @@ static ggml_guid_t ggml_backend_sycl_guid() {
return &guid; return &guid;
} }
bool ggml_backend_is_sycl(ggml_backend_t backend) {
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_sycl_guid());
}
int ggml_backend_sycl_get_device_count() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
return ggml_sycl_info().device_count;
}
void ggml_backend_sycl_get_device_description(int device, char *description,
size_t description_size) try {
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device(device).get_device_info(prop)));
snprintf(description, description_size, "%s", prop.get_name());
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
// backend device
struct ggml_backend_sycl_device_context {
int device;
std::string name;
std::string description;
};
static const char * ggml_backend_sycl_device_get_name(ggml_backend_dev_t dev) {
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
return ctx->name.c_str();
}
static const char * ggml_backend_sycl_device_get_description(ggml_backend_dev_t dev) {
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
return ctx->description.c_str();
}
static void ggml_backend_sycl_device_get_memory(ggml_backend_dev_t dev,
size_t *free,
size_t *total) try {
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
ggml_sycl_set_device(ctx->device);
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_current_device().get_memory_info(*free, *total)));
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
static enum ggml_backend_dev_type ggml_backend_sycl_device_get_type(ggml_backend_dev_t dev) {
GGML_UNUSED(dev);
return GGML_BACKEND_DEVICE_TYPE_GPU_FULL;
}
static void ggml_backend_sycl_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_sycl_device_get_name(dev);
props->description = ggml_backend_sycl_device_get_description(dev);
props->type = ggml_backend_sycl_device_get_type(dev);
ggml_backend_sycl_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = {
/* .async = */ true,
/* .host_buffer = */ false,
/* .buffer_from_host_ptr = */ false,
/* .events = */ false,
};
}
static ggml_backend_t ggml_backend_sycl_device_init(ggml_backend_dev_t dev, const char * params) {
GGML_UNUSED(params);
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
return ggml_backend_sycl_init(ctx->device);
}
static ggml_backend_buffer_type_t ggml_backend_sycl_device_get_buffer_type(ggml_backend_dev_t dev) {
ggml_backend_sycl_device_context * ctx = (ggml_backend_sycl_device_context *)dev->context;
return ggml_backend_sycl_buffer_type(ctx->device);
}
static ggml_backend_buffer_type_t ggml_backend_sycl_device_get_host_buffer_type(ggml_backend_dev_t dev) {
GGML_UNUSED(dev);
return ggml_backend_sycl_host_buffer_type();
}
static ggml_backend_buffer_t ggml_backend_sycl_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
GGML_UNUSED(dev);
GGML_UNUSED(ptr);
GGML_UNUSED(size);
GGML_UNUSED(max_tensor_size);
return nullptr;
}
static bool ggml_backend_sycl_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
if (ggml_backend_buft_is_sycl_split(buft)) {
return true;
}
if (ggml_backend_buft_is_sycl(buft)) {
ggml_backend_sycl_device_context * dev_ctx = (ggml_backend_sycl_device_context *)dev->context;
ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context;
return buft_ctx->device == dev_ctx->device;
}
return false;
}
static bool ggml_backend_sycl_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
const int min_batch_size = 32;
return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
(op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
GGML_UNUSED(dev);
}
static const ggml_backend_device_i ggml_backend_sycl_device_interface = {
/* .get_name = */ ggml_backend_sycl_device_get_name,
/* .get_description = */ ggml_backend_sycl_device_get_description,
/* .get_memory = */ ggml_backend_sycl_device_get_memory,
/* .get_type = */ ggml_backend_sycl_device_get_type,
/* .get_props = */ ggml_backend_sycl_device_get_props,
/* .init_backend = */ ggml_backend_sycl_device_init,
/* .get_buffer_type = */ ggml_backend_sycl_device_get_buffer_type,
/* .get_host_buffer_type = */ ggml_backend_sycl_device_get_host_buffer_type,
/* .buffer_from_host_ptr = */ ggml_backend_sycl_device_buffer_from_host_ptr,
/* .supports_op = */ ggml_backend_sycl_device_supports_op,
/* .supports_buft = */ ggml_backend_sycl_device_supports_buft,
/* .offload_op = */ ggml_backend_sycl_device_offload_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_synchronize = */ NULL,
};
// backend reg
struct ggml_backend_sycl_reg_context {
std::vector<ggml_backend_dev_t> devices;
};
static const char * ggml_backend_sycl_reg_get_name(ggml_backend_reg_t reg) {
GGML_UNUSED(reg);
return GGML_SYCL_NAME;
}
static size_t ggml_backend_sycl_reg_get_device_count(ggml_backend_reg_t reg) {
ggml_backend_sycl_reg_context * ctx = (ggml_backend_sycl_reg_context *)reg->context;
return ctx->devices.size();
}
static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t reg, size_t index) {
ggml_backend_sycl_reg_context * ctx = (ggml_backend_sycl_reg_context *)reg->context;
GGML_ASSERT(index < ctx->devices.size());
return ctx->devices[index];
}
static void * ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
GGML_UNUSED(reg);
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
return (void *)ggml_backend_sycl_split_buffer_type;
}
if (strcmp(name, "ggml_backend_register_host_buffer") == 0) {
/* SYCL currently does not support registering of existing host memory for use by
device. Use USM to allocate memory for use by host and device. */
return nullptr;
}
if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
/* SYCL currently does not support registering of existing host memory for use by
device. Use USM to allocate memory for use by host and device. */
return nullptr;
}
return nullptr;
}
static const ggml_backend_reg_i ggml_backend_sycl_reg_interface = {
/* .get_name = */ ggml_backend_sycl_reg_get_name,
/* .get_device_count = */ ggml_backend_sycl_reg_get_device_count,
/* .get_device_get = */ ggml_backend_sycl_reg_get_device,
/* .get_proc_address = */ ggml_backend_sycl_reg_get_proc_address,
};
// backend registry
ggml_backend_reg_t ggml_backend_sycl_reg() {
static ggml_backend_reg reg;
static bool sycl_reg_initialized = false;
{
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
if (!sycl_reg_initialized) {
ggml_backend_sycl_reg_context * ctx = new ggml_backend_sycl_reg_context;
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
ggml_backend_sycl_device_context * dev_ctx = new ggml_backend_sycl_device_context;
dev_ctx->device = i;
dev_ctx->name = GGML_SYCL_NAME + std::to_string(i);
ggml_sycl_set_device(i);
sycl::device device = dpct::dev_mgr::instance().get_device(i);
dpct::device_info prop;
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_device_info(prop, device)));
std::string name = std::string(prop.get_name());
dev_ctx->description = name;
ggml_backend_dev_t dev = new ggml_backend_device {
/* .interface = */ ggml_backend_sycl_device_interface,
/* .reg = */ &reg,
/* .context = */ dev_ctx
};
ctx->devices.push_back(dev);
}
reg = ggml_backend_reg {
/* .interface = */ ggml_backend_sycl_reg_interface,
/* .context = */ ctx
};
}
sycl_reg_initialized = true;
}
return &reg;
}
ggml_backend_t ggml_backend_sycl_init(int device) { ggml_backend_t ggml_backend_sycl_init(int device) {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
ggml_check_sycl(); ggml_check_sycl();
@ -5225,18 +5491,9 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
ggml_backend_t sycl_backend = new ggml_backend { ggml_backend_t sycl_backend = new ggml_backend {
/* .guid = */ ggml_backend_sycl_guid(), /* .guid = */ ggml_backend_sycl_guid(),
/* .interface = */ ggml_backend_sycl_interface, /* .interface = */ ggml_backend_sycl_interface,
/* .device = */ nullptr, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), device),
/* .context = */ ctx /* .context = */ ctx
}; };
return sycl_backend; return sycl_backend;
} }
bool ggml_backend_is_sycl(ggml_backend_t backend) {
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_sycl_guid());
}
int ggml_backend_sycl_get_device_count() {
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
return ggml_sycl_info().device_count;
}

View File

@ -1400,6 +1400,47 @@ namespace dpct
GGML_UNUSED(direction); GGML_UNUSED(direction);
} }
// RAII for host pointer
class host_buffer {
void *_buf;
size_t _size;
sycl::queue &_q;
const std::vector<sycl::event> &_deps; // free operation depends
public:
host_buffer(size_t size, sycl::queue &q, const std::vector<sycl::event> &deps)
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
void *get_ptr() const { return _buf; }
size_t get_size() const { return _size; }
~host_buffer() {
if (_buf) {
_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(_deps);
cgh.host_task([buf = _buf] { std::free(buf); });
});
}
}
};
static sycl::event dpct_memcpy(sycl::queue &q, void *to_ptr, int to_dev_id,
const void *from_ptr, int from_dev_id,
size_t size) {
if (to_dev_id == from_dev_id)
return dpct_memcpy(q, to_ptr, from_ptr, size,
memcpy_direction::device_to_device);
// Now, different device have different context, and memcpy API cannot copy
// data between different context. So we need use host buffer to copy the data
// between devices.
std::vector<sycl::event> event_list;
host_buffer buf(size, q, event_list);
auto copy_events = dpct_memcpy(q, buf.get_ptr(), from_ptr, size,
memcpy_direction::device_to_host);
event_list.push_back(dpct::detail::dpct_memcpy(
q, to_ptr, buf.get_ptr(), size, memcpy_direction::host_to_device,
{copy_events}));
return event_list[0];
}
// Get actual copy range and make sure it will not exceed range. // Get actual copy range and make sure it will not exceed range.
static inline size_t get_copy_range(sycl::range<3> size, size_t slice, static inline size_t get_copy_range(sycl::range<3> size, size_t slice,
size_t pitch) size_t pitch)
@ -1810,6 +1851,12 @@ namespace dpct
detail::dpct_memcpy(q, to_ptr, from_ptr, size, direction); detail::dpct_memcpy(q, to_ptr, from_ptr, size, direction);
} }
static void async_dpct_memcpy(void *to_ptr, int to_dev_id, const void *from_ptr,
int from_dev_id, size_t size,
sycl::queue &q = get_default_queue()) {
detail::dpct_memcpy(q, to_ptr, to_dev_id, from_ptr, from_dev_id, size);
}
static inline unsigned int select_device(unsigned int id) static inline unsigned int select_device(unsigned int id)
{ {
dev_mgr::instance().select_device(id); dev_mgr::instance().select_device(id);

View File

@ -3416,9 +3416,7 @@ struct llama_lora_adapter {
static int llama_get_device_count(const llama_model & model) { static int llama_get_device_count(const llama_model & model) {
int count = (int) model.devices.size(); int count = (int) model.devices.size();
#if defined(GGML_USE_SYCL) #if defined(GGML_USE_VULKAN)
count += ggml_backend_sycl_get_device_count();
#elif defined(GGML_USE_VULKAN)
count += ggml_backend_vk_get_device_count(); count += ggml_backend_vk_get_device_count();
#elif defined(GGML_USE_CANN) #elif defined(GGML_USE_CANN)
count += ggml_backend_cann_get_device_count(); count += ggml_backend_cann_get_device_count();