mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
[SYCL] offload op (#6217)
* remove no USM methods * leave the schedule to ggml_backend_sched entirely
This commit is contained in:
parent
d03224ac98
commit
ddf6568510
277
ggml-sycl.cpp
277
ggml-sycl.cpp
@ -740,11 +740,7 @@ namespace dpct
|
|||||||
|
|
||||||
sycl::queue &default_queue()
|
sycl::queue &default_queue()
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return out_of_order_queue();
|
|
||||||
#else
|
|
||||||
return in_order_queue();
|
return in_order_queue();
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void queues_wait_and_throw()
|
void queues_wait_and_throw()
|
||||||
@ -763,11 +759,7 @@ namespace dpct
|
|||||||
|
|
||||||
sycl::queue *create_queue(bool enable_exception_handler = false)
|
sycl::queue *create_queue(bool enable_exception_handler = false)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return create_out_of_order_queue(enable_exception_handler);
|
|
||||||
#else
|
|
||||||
return create_in_order_queue(enable_exception_handler);
|
return create_in_order_queue(enable_exception_handler);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_queue(sycl::context context, sycl::device device,
|
sycl::queue *create_queue(sycl::context context, sycl::device device,
|
||||||
@ -1075,11 +1067,6 @@ namespace dpct
|
|||||||
static pointer_access_attribute get_pointer_attribute(sycl::queue &q,
|
static pointer_access_attribute get_pointer_attribute(sycl::queue &q,
|
||||||
const void *ptr)
|
const void *ptr)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return mem_mgr::instance().is_device_ptr(ptr)
|
|
||||||
? pointer_access_attribute::device_only
|
|
||||||
: pointer_access_attribute::host_only;
|
|
||||||
#else
|
|
||||||
switch (sycl::get_pointer_type(ptr, q.get_context()))
|
switch (sycl::get_pointer_type(ptr, q.get_context()))
|
||||||
{
|
{
|
||||||
case sycl::usm::alloc::unknown:
|
case sycl::usm::alloc::unknown:
|
||||||
@ -1090,7 +1077,6 @@ namespace dpct
|
|||||||
case sycl::usm::alloc::host:
|
case sycl::usm::alloc::host:
|
||||||
return pointer_access_attribute::host_device;
|
return pointer_access_attribute::host_device;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ArgT>
|
template <typename ArgT>
|
||||||
@ -1273,11 +1259,7 @@ namespace dpct
|
|||||||
|
|
||||||
static inline void *dpct_malloc(size_t size, sycl::queue &q)
|
static inline void *dpct_malloc(size_t size, sycl::queue &q)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return mem_mgr::instance().mem_alloc(size * sizeof(byte_t));
|
|
||||||
#else
|
|
||||||
return sycl::malloc_device(size, q.get_device(), q.get_context());
|
return sycl::malloc_device(size, q.get_device(), q.get_context());
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
|
#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
|
||||||
@ -1301,25 +1283,7 @@ namespace dpct
|
|||||||
static inline sycl::event dpct_memset(sycl::queue &q, void *dev_ptr,
|
static inline sycl::event dpct_memset(sycl::queue &q, void *dev_ptr,
|
||||||
valueT value, size_t size)
|
valueT value, size_t size)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
auto &mm = mem_mgr::instance();
|
|
||||||
assert(mm.is_device_ptr(dev_ptr));
|
|
||||||
auto alloc = mm.translate_ptr(dev_ptr);
|
|
||||||
size_t offset = (valueT *)dev_ptr - (valueT *)alloc.alloc_ptr;
|
|
||||||
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto o = sycl::id<1>(offset);
|
|
||||||
auto new_buffer = alloc.buffer.reinterpret<valueT>(
|
|
||||||
sycl::range<1>(alloc.size / sizeof(valueT)));
|
|
||||||
sycl::accessor<valueT, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
acc(new_buffer, cgh, r, o);
|
|
||||||
cgh.fill(acc, value); });
|
|
||||||
#else
|
|
||||||
return q.fill(dev_ptr, value, size);
|
return q.fill(dev_ptr, value, size);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1413,72 +1377,8 @@ namespace dpct
|
|||||||
{
|
{
|
||||||
if (!size)
|
if (!size)
|
||||||
return sycl::event{};
|
return sycl::event{};
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
auto &mm = mem_mgr::instance();
|
|
||||||
auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
|
|
||||||
|
|
||||||
switch (real_direction)
|
|
||||||
{
|
|
||||||
case host_to_host:
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
|
|
||||||
case host_to_device:
|
|
||||||
{
|
|
||||||
auto alloc = mm.translate_ptr(to_ptr);
|
|
||||||
size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto o = sycl::id<1>(offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
acc(alloc.buffer, cgh, r, o);
|
|
||||||
cgh.copy(from_ptr, acc); });
|
|
||||||
}
|
|
||||||
case device_to_host:
|
|
||||||
{
|
|
||||||
auto alloc = mm.translate_ptr(from_ptr);
|
|
||||||
size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto o = sycl::id<1>(offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
acc(alloc.buffer, cgh, r, o);
|
|
||||||
cgh.copy(acc, to_ptr); });
|
|
||||||
}
|
|
||||||
case device_to_device:
|
|
||||||
{
|
|
||||||
auto to_alloc = mm.translate_ptr(to_ptr);
|
|
||||||
auto from_alloc = mm.translate_ptr(from_ptr);
|
|
||||||
size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
|
|
||||||
size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto to_o = sycl::id<1>(to_offset);
|
|
||||||
auto from_o = sycl::id<1>(from_offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
to_acc(to_alloc.buffer, cgh, r, to_o);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
from_acc(from_alloc.buffer, cgh, r, from_o);
|
|
||||||
cgh.copy(from_acc, to_acc); });
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||||
GGML_UNUSED(direction);
|
GGML_UNUSED(direction);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get actual copy range and make sure it will not exceed range.
|
// Get actual copy range and make sure it will not exceed range.
|
||||||
@ -1618,36 +1518,7 @@ namespace dpct
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case device_to_device:
|
case device_to_device:
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
event_list.push_back(q.submit([&](sycl::handler &cgh){
|
||||||
{
|
|
||||||
auto &mm = mem_mgr::instance();
|
|
||||||
auto to_alloc = mm.translate_ptr(to_surface);
|
|
||||||
auto from_alloc = mm.translate_ptr(from_surface);
|
|
||||||
size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
|
|
||||||
size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
|
|
||||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto to_o = sycl::id<1>(to_offset);
|
|
||||||
auto from_o = sycl::id<1>(from_offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
to_acc(to_alloc.buffer, cgh,
|
|
||||||
get_copy_range(size, to_slice, to_range.get(0)), to_o);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
from_acc(from_alloc.buffer, cgh,
|
|
||||||
get_copy_range(size, from_slice, from_range.get(0)), from_o);
|
|
||||||
cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
|
|
||||||
size,
|
|
||||||
[=](sycl::id<3> id) {
|
|
||||||
to_acc[get_offset(id, to_slice, to_range.get(0))] =
|
|
||||||
from_acc[get_offset(id, from_slice, from_range.get(0))];
|
|
||||||
}); }));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
cgh.depends_on(dep_events);
|
||||||
cgh.parallel_for<class dpct_memcpy_3d_detail>(
|
cgh.parallel_for<class dpct_memcpy_3d_detail>(
|
||||||
size,
|
size,
|
||||||
@ -1655,7 +1526,6 @@ namespace dpct
|
|||||||
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
||||||
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
||||||
}); }));
|
}); }));
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||||
@ -1754,11 +1624,7 @@ namespace dpct
|
|||||||
{
|
{
|
||||||
if (ptr)
|
if (ptr)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
detail::mem_mgr::instance().mem_free(ptr);
|
|
||||||
#else
|
|
||||||
sycl::free(ptr, q.get_context());
|
sycl::free(ptr, q.get_context());
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1766,11 +1632,7 @@ namespace dpct
|
|||||||
inline auto get_memory(const void *x)
|
inline auto get_memory(const void *x)
|
||||||
{
|
{
|
||||||
T *new_x = reinterpret_cast<T *>(const_cast<void *>(x));
|
T *new_x = reinterpret_cast<T *>(const_cast<void *>(x));
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return dpct::get_buffer<std::remove_cv_t<T>>(new_x);
|
|
||||||
#else
|
|
||||||
return new_x;
|
return new_x;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -2222,72 +2084,8 @@ namespace dpct
|
|||||||
{
|
{
|
||||||
if (!size)
|
if (!size)
|
||||||
return sycl::event{};
|
return sycl::event{};
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
auto &mm = mem_mgr::instance();
|
|
||||||
auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
|
|
||||||
|
|
||||||
switch (real_direction)
|
|
||||||
{
|
|
||||||
case host_to_host:
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
|
|
||||||
case host_to_device:
|
|
||||||
{
|
|
||||||
auto alloc = mm.translate_ptr(to_ptr);
|
|
||||||
size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto o = sycl::id<1>(offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
acc(alloc.buffer, cgh, r, o);
|
|
||||||
cgh.copy(from_ptr, acc); });
|
|
||||||
}
|
|
||||||
case device_to_host:
|
|
||||||
{
|
|
||||||
auto alloc = mm.translate_ptr(from_ptr);
|
|
||||||
size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto o = sycl::id<1>(offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
acc(alloc.buffer, cgh, r, o);
|
|
||||||
cgh.copy(acc, to_ptr); });
|
|
||||||
}
|
|
||||||
case device_to_device:
|
|
||||||
{
|
|
||||||
auto to_alloc = mm.translate_ptr(to_ptr);
|
|
||||||
auto from_alloc = mm.translate_ptr(from_ptr);
|
|
||||||
size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
|
|
||||||
size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
|
|
||||||
return q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto r = sycl::range<1>(size);
|
|
||||||
auto to_o = sycl::id<1>(to_offset);
|
|
||||||
auto from_o = sycl::id<1>(from_offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
to_acc(to_alloc.buffer, cgh, r, to_o);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
from_acc(from_alloc.buffer, cgh, r, from_o);
|
|
||||||
cgh.copy(from_acc, to_acc); });
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
return q.memcpy(to_ptr, from_ptr, size, dep_events);
|
||||||
GGML_UNUSED(direction);
|
GGML_UNUSED(direction);
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get actual copy range and make sure it will not exceed range.
|
// Get actual copy range and make sure it will not exceed range.
|
||||||
@ -2427,34 +2225,6 @@ namespace dpct
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case device_to_device:
|
case device_to_device:
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
{
|
|
||||||
auto &mm = mem_mgr::instance();
|
|
||||||
auto to_alloc = mm.translate_ptr(to_surface);
|
|
||||||
auto from_alloc = mm.translate_ptr(from_surface);
|
|
||||||
size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
|
|
||||||
size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
|
|
||||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
|
||||||
{
|
|
||||||
cgh.depends_on(dep_events);
|
|
||||||
auto to_o = sycl::id<1>(to_offset);
|
|
||||||
auto from_o = sycl::id<1>(from_offset);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::write,
|
|
||||||
sycl::access::target::device>
|
|
||||||
to_acc(to_alloc.buffer, cgh,
|
|
||||||
get_copy_range(size, to_slice, to_range.get(0)), to_o);
|
|
||||||
sycl::accessor<byte_t, 1, sycl::access_mode::read,
|
|
||||||
sycl::access::target::device>
|
|
||||||
from_acc(from_alloc.buffer, cgh,
|
|
||||||
get_copy_range(size, from_slice, from_range.get(0)), from_o);
|
|
||||||
cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
|
|
||||||
size,
|
|
||||||
[=](sycl::id<3> id) {
|
|
||||||
to_acc[get_offset(id, to_slice, to_range.get(0))] =
|
|
||||||
from_acc[get_offset(id, from_slice, from_range.get(0))];
|
|
||||||
}); }));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
event_list.push_back(q.submit([&](sycl::handler &cgh)
|
||||||
{
|
{
|
||||||
cgh.depends_on(dep_events);
|
cgh.depends_on(dep_events);
|
||||||
@ -2464,7 +2234,6 @@ namespace dpct
|
|||||||
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
to_surface[get_offset(id, to_slice, to_range.get(0))] =
|
||||||
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
from_surface[get_offset(id, from_slice, from_range.get(0))];
|
||||||
}); }));
|
}); }));
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
throw std::runtime_error("dpct_memcpy: invalid direction value");
|
||||||
@ -2655,9 +2424,6 @@ namespace dpct
|
|||||||
void *c[], library_data_t c_type, int ldc,
|
void *c[], library_data_t c_type, int ldc,
|
||||||
int batch_size, library_data_t scaling_type)
|
int batch_size, library_data_t scaling_type)
|
||||||
{
|
{
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
throw std::runtime_error("this API is unsupported when USM level is none");
|
|
||||||
#else
|
|
||||||
if (scaling_type == library_data_t::real_float &&
|
if (scaling_type == library_data_t::real_float &&
|
||||||
c_type == library_data_t::complex_float)
|
c_type == library_data_t::complex_float)
|
||||||
{
|
{
|
||||||
@ -2792,7 +2558,6 @@ namespace dpct
|
|||||||
default:
|
default:
|
||||||
throw std::runtime_error("the combination of data type is unsupported");
|
throw std::runtime_error("the combination of data type is unsupported");
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes a batch of matrix-matrix product with general matrices.
|
/// Computes a batch of matrix-matrix product with general matrices.
|
||||||
@ -3131,24 +2896,9 @@ namespace dpct
|
|||||||
template <size_t D = Dimension>
|
template <size_t D = Dimension>
|
||||||
typename std::enable_if<D == 1, T>::type &operator[](size_t index) {
|
typename std::enable_if<D == 1, T>::type &operator[](size_t index) {
|
||||||
init();
|
init();
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
return dpct::get_buffer<typename std::enable_if<D == 1, T>::type>(
|
|
||||||
_device_ptr)
|
|
||||||
.template get_access<sycl::access_mode::read_write>()[index];
|
|
||||||
#else
|
|
||||||
return _device_ptr[index];
|
return _device_ptr[index];
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
/// Get sycl::accessor for the device memory object when usm is not used.
|
|
||||||
accessor_t get_access(sycl::handler &cgh) {
|
|
||||||
return get_buffer(_device_ptr)
|
|
||||||
.template reinterpret<T, Dimension>(_range)
|
|
||||||
.template get_access<detail::memory_traits<Memory, T>::mode,
|
|
||||||
detail::memory_traits<Memory, T>::target>(cgh);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/// Get dpct::accessor with dimension info for the device memory object
|
/// Get dpct::accessor with dimension info for the device memory object
|
||||||
/// when usm is used and dimension is greater than 1.
|
/// when usm is used and dimension is greater than 1.
|
||||||
template <size_t D = Dimension>
|
template <size_t D = Dimension>
|
||||||
@ -3156,7 +2906,6 @@ namespace dpct
|
|||||||
get_access(sycl::handler &cgh) {
|
get_access(sycl::handler &cgh) {
|
||||||
return dpct_accessor_t((T *)_device_ptr, _range);
|
return dpct_accessor_t((T *)_device_ptr, _range);
|
||||||
}
|
}
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
device_memory(value_t *memory_ptr, size_t size)
|
device_memory(value_t *memory_ptr, size_t size)
|
||||||
@ -3201,15 +2950,6 @@ namespace dpct
|
|||||||
|
|
||||||
/// Default constructor
|
/// Default constructor
|
||||||
device_memory() : base(1) {}
|
device_memory() : base(1) {}
|
||||||
|
|
||||||
#ifdef DPCT_USM_LEVEL_NONE
|
|
||||||
/// Get sycl::accessor for the device memory object when usm is not used.
|
|
||||||
accessor_t get_access(sycl::handler &cgh) {
|
|
||||||
auto buf = get_buffer(base::get_ptr())
|
|
||||||
.template reinterpret<T, 1>(sycl::range<1>(1));
|
|
||||||
return accessor_t(buf, cgh);
|
|
||||||
}
|
|
||||||
#endif // DPCT_USM_LEVEL_NONE
|
|
||||||
};
|
};
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
@ -13181,7 +12921,7 @@ int get_work_group_size(int user_device_id) {
|
|||||||
return prop.get_max_work_group_size();
|
return prop.get_max_work_group_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_init_sycl() try {
|
static void ggml_init_sycl() try {
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
|
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
@ -16677,6 +16417,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
||||||
|
ggml_init_sycl();
|
||||||
if (device_index>=g_device_count or device_index<0) {
|
if (device_index>=g_device_count or device_index<0) {
|
||||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||||
device_index, g_device_count-1);
|
device_index, g_device_count-1);
|
||||||
@ -17046,6 +16787,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
|
|||||||
};
|
};
|
||||||
|
|
||||||
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
||||||
|
ggml_init_sycl();
|
||||||
// FIXME: this is not thread safe
|
// FIXME: this is not thread safe
|
||||||
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
||||||
|
|
||||||
@ -17379,6 +17121,13 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|||||||
UNUSED(backend);
|
UNUSED(backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
||||||
|
const int min_batch_size = 32;
|
||||||
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
||||||
|
GGML_UNUSED(backend);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static ggml_backend_i ggml_backend_sycl_interface = {
|
static ggml_backend_i ggml_backend_sycl_interface = {
|
||||||
/* .get_name = */ ggml_backend_sycl_name,
|
/* .get_name = */ ggml_backend_sycl_name,
|
||||||
/* .free = */ ggml_backend_sycl_free,
|
/* .free = */ ggml_backend_sycl_free,
|
||||||
@ -17392,7 +17141,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
|
|||||||
/* .graph_plan_compute = */ NULL,
|
/* .graph_plan_compute = */ NULL,
|
||||||
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
||||||
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
||||||
/* .offload_op = */ NULL,
|
/* .offload_op = */ ggml_backend_sycl_offload_op,
|
||||||
/* .event_new = */ NULL,
|
/* .event_new = */ NULL,
|
||||||
/* .event_free = */ NULL,
|
/* .event_free = */ NULL,
|
||||||
/* .event_record = */ NULL,
|
/* .event_record = */ NULL,
|
||||||
@ -17406,7 +17155,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
||||||
ggml_init_sycl(); // TODO: remove from ggml.c
|
ggml_init_sycl();
|
||||||
|
|
||||||
check_allow_gpu_index(device);
|
check_allow_gpu_index(device);
|
||||||
|
|
||||||
|
16
ggml-sycl.h
16
ggml-sycl.h
@ -16,16 +16,22 @@ extern "C" {
|
|||||||
#define GGML_SYCL_MAX_DEVICES 48
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
#define GGML_SYCL_NAME "SYCL"
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
|
||||||
GGML_API void ggml_init_sycl(void);
|
// backend API
|
||||||
GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
|
||||||
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
|
||||||
|
// devide buffer
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||||
|
|
||||||
|
// split tensor buffer that splits matrices by rows across multiple devices
|
||||||
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||||
|
|
||||||
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||||
|
|
||||||
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||||
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
||||||
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||||
|
|
||||||
@ -34,6 +40,10 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
|||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||||
|
|
||||||
|
// SYCL doesn't support registering host memory, keep here for reference
|
||||||
|
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||||
|
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
10
ggml.c
10
ggml.c
@ -291,8 +291,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#elif defined(GGML_USE_VULKAN)
|
#elif defined(GGML_USE_VULKAN)
|
||||||
#include "ggml-vulkan.h"
|
#include "ggml-vulkan.h"
|
||||||
#elif defined(GGML_USE_SYCL)
|
|
||||||
#include "ggml-sycl.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// floating point type used to accumulate sums
|
// floating point type used to accumulate sums
|
||||||
@ -2698,8 +2696,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
ggml_cl_init();
|
ggml_cl_init();
|
||||||
#elif defined(GGML_USE_VULKAN)
|
#elif defined(GGML_USE_VULKAN)
|
||||||
ggml_vk_init_cpu_assist();
|
ggml_vk_init_cpu_assist();
|
||||||
#elif defined(GGML_USE_SYCL)
|
|
||||||
ggml_init_sycl();
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ggml_setup_op_has_task_pass();
|
ggml_setup_op_has_task_pass();
|
||||||
@ -16115,12 +16111,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
||||||
#endif // GGML_USE_VULKAN
|
#endif // GGML_USE_VULKAN
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
|
|
||||||
if (skip_cpu) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // GGML_USE_SYCL
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
{
|
{
|
||||||
|
@ -13632,7 +13632,6 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_SYCL)
|
#elif defined(GGML_USE_SYCL)
|
||||||
if (model->n_gpu_layers > 0) {
|
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||||
@ -13657,7 +13656,6 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
ctx->backends.push_back(backend);
|
ctx->backends.push_back(backend);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
if (model->n_gpu_layers > 0) {
|
if (model->n_gpu_layers > 0) {
|
||||||
auto * backend = ggml_backend_kompute_init(model->main_gpu);
|
auto * backend = ggml_backend_kompute_init(model->main_gpu);
|
||||||
|
Loading…
Reference in New Issue
Block a user