mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 19:21:46 +00:00
[SYCL] offload op (#6217)
* remove no USM methods * leave the schedule to ggml_backend_sched entirely
This commit is contained in:
parent
d03224ac98
commit
ddf6568510
293
ggml-sycl.cpp
293
ggml-sycl.cpp
@ -740,11 +740,7 @@ namespace dpct
|
||||
|
||||
sycl::queue &default_queue()
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return out_of_order_queue();
|
||||
#else
|
||||
return in_order_queue();
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
void queues_wait_and_throw()
|
||||
@ -763,11 +759,7 @@ namespace dpct
|
||||
|
||||
sycl::queue *create_queue(bool enable_exception_handler = false)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return create_out_of_order_queue(enable_exception_handler);
|
||||
#else
|
||||
return create_in_order_queue(enable_exception_handler);
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
sycl::queue *create_queue(sycl::context context, sycl::device device,
|
||||
@ -1075,11 +1067,6 @@ namespace dpct
|
||||
static pointer_access_attribute get_pointer_attribute(sycl::queue &q,
|
||||
const void *ptr)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return mem_mgr::instance().is_device_ptr(ptr)
|
||||
? pointer_access_attribute::device_only
|
||||
: pointer_access_attribute::host_only;
|
||||
#else
|
||||
switch (sycl::get_pointer_type(ptr, q.get_context()))
|
||||
{
|
||||
case sycl::usm::alloc::unknown:
|
||||
@ -1090,7 +1077,6 @@ namespace dpct
|
||||
case sycl::usm::alloc::host:
|
||||
return pointer_access_attribute::host_device;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename ArgT>
|
||||
@ -1273,11 +1259,7 @@ namespace dpct
|
||||
|
||||
static inline void *dpct_malloc(size_t size, sycl::queue &q)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return mem_mgr::instance().mem_alloc(size * sizeof(byte_t));
|
||||
#else
|
||||
return sycl::malloc_device(size, q.get_device(), q.get_context());
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
|
||||
@ -1301,25 +1283,7 @@ namespace dpct
|
||||
static inline sycl::event dpct_memset(sycl::queue &q, void *dev_ptr,
|
||||
valueT value, size_t size)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
auto &mm = mem_mgr::instance();
|
||||
assert(mm.is_device_ptr(dev_ptr));
|
||||
auto alloc = mm.translate_ptr(dev_ptr);
|
||||
size_t offset = (valueT *)dev_ptr - (valueT *)alloc.alloc_ptr;
|
||||
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
auto r = sycl::range<1>(size);
|
||||
auto o = sycl::id<1>(offset);
|
||||
auto new_buffer = alloc.buffer.reinterpret<valueT>(
|
||||
sycl::range<1>(alloc.size / sizeof(valueT)));
|
||||
sycl::accessor<valueT, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
acc(new_buffer, cgh, r, o);
|
||||
cgh.fill(acc, value); });
|
||||
#else
|
||||
return q.fill(dev_ptr, value, size);
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1413,72 +1377,8 @@ namespace dpct
|
||||
{
|
||||
if (!size)
|
||||
return sycl::event{};
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
auto &mm = mem_mgr::instance();
|
||||
auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
|
||||
|
||||
switch (real_direction)
|
||||
{
|
||||
case host_to_host:
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
|
||||
case host_to_device:
|
||||
{
|
||||
auto alloc = mm.translate_ptr(to_ptr);
|
||||
size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto o = sycl::id<1>(offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
acc(alloc.buffer, cgh, r, o);
|
||||
cgh.copy(from_ptr, acc); });
|
||||
}
|
||||
case device_to_host:
|
||||
{
|
||||
auto alloc = mm.translate_ptr(from_ptr);
|
||||
size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto o = sycl::id<1>(offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
acc(alloc.buffer, cgh, r, o);
|
||||
cgh.copy(acc, to_ptr); });
|
||||
}
|
||||
case device_to_device:
|
||||
{
|
||||
auto to_alloc = mm.translate_ptr(to_ptr);
|
||||
auto from_alloc = mm.translate_ptr(from_ptr);
|
||||
size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
|
||||
size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto to_o = sycl::id<1>(to_offset);
|
||||
auto from_o = sycl::id<1>(from_offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
to_acc(to_alloc.buffer, cgh, r, to_o);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
from_acc(from_alloc.buffer, cgh, r, from_o);
|
||||
cgh.copy(from_acc, to_acc); });
|
||||
}
|
||||
default:
|
||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||
}
|
||||
#else
|
||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||
GGML_UNUSED(direction);
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
// Get actual copy range and make sure it will not exceed range.
|
||||
@ -1618,45 +1518,15 @@ namespace dpct
|
||||
break;
|
||||
}
|
||||
case device_to_device:
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
{
|
||||
auto &mm = mem_mgr::instance();
|
||||
auto to_alloc = mm.translate_ptr(to_surface);
|
||||
auto from_alloc = mm.translate_ptr(from_surface);
|
||||
size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
|
||||
size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
|
||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto to_o = sycl::id<1>(to_offset);
|
||||
auto from_o = sycl::id<1>(from_offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
to_acc(to_alloc.buffer, cgh,
|
||||
get_copy_range(size, to_slice, to_range.get(0)), to_o);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
from_acc(from_alloc.buffer, cgh,
|
||||
get_copy_range(size, from_slice, from_range.get(0)), from_o);
|
||||
cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
|
||||
size,
|
||||
[=](sycl::id<3> id) {
|
||||
to_acc[get_offset(id, to_slice, to_range.get(0))] =
|
||||
from_acc[get_offset(id, from_slice, from_range.get(0))];
|
||||
}); }));
|
||||
}
|
||||
#else
|
||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
cgh.parallel_for<class dpct_memcpy_3d_detail>(
|
||||
size,
|
||||
[=](sycl::id<3> id) {
|
||||
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
||||
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
||||
}); }));
|
||||
#endif
|
||||
break;
|
||||
event_list.push_back(q.submit([&](sycl::handler &cgh){
|
||||
cgh.depends_on(dep_events);
|
||||
cgh.parallel_for<class dpct_memcpy_3d_detail>(
|
||||
size,
|
||||
[=](sycl::id<3> id) {
|
||||
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
||||
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
||||
}); }));
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||
}
|
||||
@ -1754,11 +1624,7 @@ namespace dpct
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
detail::mem_mgr::instance().mem_free(ptr);
|
||||
#else
|
||||
sycl::free(ptr, q.get_context());
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
}
|
||||
|
||||
@ -1766,11 +1632,7 @@ namespace dpct
|
||||
inline auto get_memory(const void *x)
|
||||
{
|
||||
T *new_x = reinterpret_cast<T *>(const_cast<void *>(x));
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return dpct::get_buffer<std::remove_cv_t<T>>(new_x);
|
||||
#else
|
||||
return new_x;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -2222,72 +2084,8 @@ namespace dpct
|
||||
{
|
||||
if (!size)
|
||||
return sycl::event{};
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
auto &mm = mem_mgr::instance();
|
||||
auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
|
||||
|
||||
switch (real_direction)
|
||||
{
|
||||
case host_to_host:
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
|
||||
case host_to_device:
|
||||
{
|
||||
auto alloc = mm.translate_ptr(to_ptr);
|
||||
size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto o = sycl::id<1>(offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
acc(alloc.buffer, cgh, r, o);
|
||||
cgh.copy(from_ptr, acc); });
|
||||
}
|
||||
case device_to_host:
|
||||
{
|
||||
auto alloc = mm.translate_ptr(from_ptr);
|
||||
size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto o = sycl::id<1>(offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
acc(alloc.buffer, cgh, r, o);
|
||||
cgh.copy(acc, to_ptr); });
|
||||
}
|
||||
case device_to_device:
|
||||
{
|
||||
auto to_alloc = mm.translate_ptr(to_ptr);
|
||||
auto from_alloc = mm.translate_ptr(from_ptr);
|
||||
size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
|
||||
size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
|
||||
return q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto r = sycl::range<1>(size);
|
||||
auto to_o = sycl::id<1>(to_offset);
|
||||
auto from_o = sycl::id<1>(from_offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
to_acc(to_alloc.buffer, cgh, r, to_o);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
from_acc(from_alloc.buffer, cgh, r, from_o);
|
||||
cgh.copy(from_acc, to_acc); });
|
||||
}
|
||||
default:
|
||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||
}
|
||||
#else
|
||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||
GGML_UNUSED(direction);
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
// Get actual copy range and make sure it will not exceed range.
|
||||
@ -2427,34 +2225,6 @@ namespace dpct
|
||||
break;
|
||||
}
|
||||
case device_to_device:
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
{
|
||||
auto &mm = mem_mgr::instance();
|
||||
auto to_alloc = mm.translate_ptr(to_surface);
|
||||
auto from_alloc = mm.translate_ptr(from_surface);
|
||||
size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
|
||||
size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
|
||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
auto to_o = sycl::id<1>(to_offset);
|
||||
auto from_o = sycl::id<1>(from_offset);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
||||
sycl::access::target::device>
|
||||
to_acc(to_alloc.buffer, cgh,
|
||||
get_copy_range(size, to_slice, to_range.get(0)), to_o);
|
||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
||||
sycl::access::target::device>
|
||||
from_acc(from_alloc.buffer, cgh,
|
||||
get_copy_range(size, from_slice, from_range.get(0)), from_o);
|
||||
cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
|
||||
size,
|
||||
[=](sycl::id<3> id) {
|
||||
to_acc[get_offset(id, to_slice, to_range.get(0))] =
|
||||
from_acc[get_offset(id, from_slice, from_range.get(0))];
|
||||
}); }));
|
||||
}
|
||||
#else
|
||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
||||
{
|
||||
cgh.depends_on(dep_events);
|
||||
@ -2464,7 +2234,6 @@ namespace dpct
|
||||
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
||||
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
||||
}); }));
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||
@ -2655,9 +2424,6 @@ namespace dpct
|
||||
void *c[], library_data_t c_type, int ldc,
|
||||
int batch_size, library_data_t scaling_type)
|
||||
{
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
throw std::runtime_error("this API is unsupported when USM level is none");
|
||||
#else
|
||||
if (scaling_type == library_data_t::real_float &&
|
||||
c_type == library_data_t::complex_float)
|
||||
{
|
||||
@ -2792,7 +2558,6 @@ namespace dpct
|
||||
default:
|
||||
throw std::runtime_error("the combination of data type is unsupported");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Computes a batch of matrix-matrix product with general matrices.
|
||||
@ -3131,24 +2896,9 @@ namespace dpct
|
||||
template <size_t D = Dimension>
|
||||
typename std::enable_if<D == 1, T>::type &operator[](size_t index) {
|
||||
init();
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
return dpct::get_buffer<typename std::enable_if<D == 1, T>::type>(
|
||||
_device_ptr)
|
||||
.template get_access<sycl::access_mode::read_write>()[index];
|
||||
#else
|
||||
return _device_ptr[index];
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
}
|
||||
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
/// Get sycl::accessor for the device memory object when usm is not used.
|
||||
accessor_t get_access(sycl::handler &cgh) {
|
||||
return get_buffer(_device_ptr)
|
||||
.template reinterpret<T, Dimension>(_range)
|
||||
.template get_access<detail::memory_traits<Memory, T>::mode,
|
||||
detail::memory_traits<Memory, T>::target>(cgh);
|
||||
}
|
||||
#else
|
||||
/// Get dpct::accessor with dimension info for the device memory object
|
||||
/// when usm is used and dimension is greater than 1.
|
||||
template <size_t D = Dimension>
|
||||
@ -3156,7 +2906,6 @@ namespace dpct
|
||||
get_access(sycl::handler &cgh) {
|
||||
return dpct_accessor_t((T *)_device_ptr, _range);
|
||||
}
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
|
||||
private:
|
||||
device_memory(value_t *memory_ptr, size_t size)
|
||||
@ -3201,15 +2950,6 @@ namespace dpct
|
||||
|
||||
/// Default constructor
|
||||
device_memory() : base(1) {}
|
||||
|
||||
#ifdef DPCT_USM_LEVEL_NONE
|
||||
/// Get sycl::accessor for the device memory object when usm is not used.
|
||||
accessor_t get_access(sycl::handler &cgh) {
|
||||
auto buf = get_buffer(base::get_ptr())
|
||||
.template reinterpret<T, 1>(sycl::range<1>(1));
|
||||
return accessor_t(buf, cgh);
|
||||
}
|
||||
#endif // DPCT_USM_LEVEL_NONE
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
@ -13181,7 +12921,7 @@ int get_work_group_size(int user_device_id) {
|
||||
return prop.get_max_work_group_size();
|
||||
}
|
||||
|
||||
void ggml_init_sycl() try {
|
||||
static void ggml_init_sycl() try {
|
||||
static bool initialized = false;
|
||||
|
||||
if (!initialized) {
|
||||
@ -16677,6 +16417,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
||||
};
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
||||
ggml_init_sycl();
|
||||
if (device_index>=g_device_count or device_index<0) {
|
||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||
device_index, g_device_count-1);
|
||||
@ -17046,6 +16787,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
|
||||
};
|
||||
|
||||
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
||||
ggml_init_sycl();
|
||||
// FIXME: this is not thread safe
|
||||
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
||||
|
||||
@ -17379,6 +17121,13 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
||||
const int min_batch_size = 32;
|
||||
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
|
||||
static ggml_backend_i ggml_backend_sycl_interface = {
|
||||
/* .get_name = */ ggml_backend_sycl_name,
|
||||
/* .free = */ ggml_backend_sycl_free,
|
||||
@ -17392,7 +17141,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .offload_op = */ ggml_backend_sycl_offload_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
@ -17406,7 +17155,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
|
||||
}
|
||||
|
||||
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
||||
ggml_init_sycl(); // TODO: remove from ggml.c
|
||||
ggml_init_sycl();
|
||||
|
||||
check_allow_gpu_index(device);
|
||||
|
||||
|
16
ggml-sycl.h
16
ggml-sycl.h
@ -16,16 +16,22 @@ extern "C" {
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
|
||||
GGML_API void ggml_init_sycl(void);
|
||||
GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
// backend API
|
||||
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||
|
||||
// devide buffer
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||
|
||||
// split tensor buffer that splits matrices by rows across multiple devices
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||
|
||||
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
||||
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||
|
||||
@ -34,6 +40,10 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||
|
||||
// SYCL doesn't support registering host memory, keep here for reference
|
||||
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
10
ggml.c
10
ggml.c
@ -291,8 +291,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
||||
#include "ggml-opencl.h"
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
#include "ggml-vulkan.h"
|
||||
#elif defined(GGML_USE_SYCL)
|
||||
#include "ggml-sycl.h"
|
||||
#endif
|
||||
|
||||
// floating point type used to accumulate sums
|
||||
@ -2698,8 +2696,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||
ggml_cl_init();
|
||||
#elif defined(GGML_USE_VULKAN)
|
||||
ggml_vk_init_cpu_assist();
|
||||
#elif defined(GGML_USE_SYCL)
|
||||
ggml_init_sycl();
|
||||
#endif
|
||||
|
||||
ggml_setup_op_has_task_pass();
|
||||
@ -16115,12 +16111,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
||||
#endif // GGML_USE_VULKAN
|
||||
|
||||
#ifdef GGML_USE_SYCL
|
||||
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
|
||||
if (skip_cpu) {
|
||||
return;
|
||||
}
|
||||
#endif // GGML_USE_SYCL
|
||||
switch (tensor->op) {
|
||||
case GGML_OP_DUP:
|
||||
{
|
||||
|
36
llama.cpp
36
llama.cpp
@ -13632,30 +13632,28 @@ struct llama_context * llama_new_context_with_model(
|
||||
}
|
||||
}
|
||||
#elif defined(GGML_USE_SYCL)
|
||||
if (model->n_gpu_layers > 0) {
|
||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||
if (backend == nullptr) {
|
||||
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
ctx->backends.push_back(backend);
|
||||
} else {
|
||||
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
||||
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
||||
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
||||
if (backend == nullptr) {
|
||||
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
||||
int id_list[GGML_SYCL_MAX_DEVICES];
|
||||
ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
ctx->backends.push_back(backend);
|
||||
} else {
|
||||
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
||||
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
||||
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
||||
if (backend == nullptr) {
|
||||
int id_list[GGML_SYCL_MAX_DEVICES];
|
||||
ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
ctx->backends.push_back(backend);
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(GGML_USE_KOMPUTE)
|
||||
|
Loading…
Reference in New Issue
Block a user