[SYCL] Fix windows build and inference (#8003)

* add sycl preset

* fix debug link error. fix windows crash

* update README
This commit is contained in:
luoyu-intel 2024-06-20 13:19:05 +00:00 committed by GitHub
parent d50f8897a7
commit de391e4c80
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 230 additions and 244 deletions

View File

@ -665,6 +665,7 @@ if (LLAMA_SYCL)
#todo: AOT #todo: AOT
find_package(IntelSYCL REQUIRED) find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)
message(STATUS "SYCL found") message(STATUS "SYCL found")
@ -679,11 +680,9 @@ if (LLAMA_SYCL)
endif() endif()
add_compile_options(-I./) #include DPCT add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif() endif()
@ -693,8 +692,10 @@ if (LLAMA_SYCL)
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
if (WIN32) if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
else() else()
add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "INTEL") if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")

View File

@ -11,9 +11,21 @@
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
} }
}, },
{
"name": "sycl-base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_CXX_COMPILER": "icx",
"LLAMA_SYCL": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } }, { "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
{ {
@ -35,15 +47,18 @@
}, },
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, { "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] }, { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] }, { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] }, { "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] }, { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] }, { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] }, { "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] }, { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] } { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
] ]
} }

View File

@ -410,15 +410,9 @@ Output (example):
4. Install build tools 4. Install build tools
a. Download & install cmake for Windows: https://cmake.org/download/ a. Download & install cmake for Windows: https://cmake.org/download/ (CMake can also be installed from Visual Studio Installer)
b. The new Visual Studio will install Ninja as default. (If not, please install it manually: https://ninja-build.org/)
b. Download & install mingw-w64 make for Windows provided by w64devkit
- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
- Extract `w64devkit` on your pc.
- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).
### II. Build llama.cpp ### II. Build llama.cpp
@ -428,10 +422,10 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
# Option 1: Use FP32 (recommended for better performance in most cases) # Option 1: Use FP32 (recommended for better performance in most cases)
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
# Option 2: Or FP16 # Option 2: Or FP16
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
cmake --build build --config Release -j cmake --build build --config Release -j
``` ```
@ -441,9 +435,23 @@ Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former in
.\examples\sycl\win-build-sycl.bat .\examples\sycl\win-build-sycl.bat
``` ```
Or, use CMake presets to build:
```sh
cmake --preset x64-windows-sycl-release
cmake --build build-x64-windows-sycl-release -j --target llama-cli
cmake -DLLAMA_SYCL_F16=ON --preset x64-windows-sycl-release
cmake --build build-x64-windows-sycl-release -j --target llama-cli
cmake --preset x64-windows-sycl-debug
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
```
Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
*Notes:* *Notes:*
- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`. - In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.
### III. Run the inference ### III. Run the inference

View File

@ -13,16 +13,16 @@ if %errorlevel% neq 0 goto ERROR
:: for FP16 :: for FP16
:: faster for long-prompt inference :: faster for long-prompt inference
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON :: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
:: for FP32 :: for FP32
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release cmake -G "Ninja" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
if %errorlevel% neq 0 goto ERROR if %errorlevel% neq 0 goto ERROR
:: build example/main only :: build example/main only
:: make main :: make main
:: build all binary :: build all binary
make -j cmake --build . -j
if %errorlevel% neq 0 goto ERROR if %errorlevel% neq 0 goto ERROR
cd .. cd ..

View File

@ -4911,7 +4911,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
GGML_TENSOR_BINARY_OP_LOCALS; GGML_TENSOR_BINARY_OP_LOCALS01;
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream(); queue_ptr main_stream = ctx.stream();

View File

@ -589,94 +589,75 @@ namespace dpct
} }
/// dpct device extension /// dpct device extension
class device_ext : public sycl::device class device_ext : public sycl::device {
{
typedef std::mutex mutex_type; typedef std::mutex mutex_type;
public: public:
device_ext() : sycl::device(), _ctx(*this) {} device_ext() : sycl::device() {}
~device_ext() ~device_ext() {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
clear_queues(); clear_queues();
} }
device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this) device_ext(const sycl::device &base) : sycl::device(base) {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
init_queues(); init_queues();
} }
int is_native_atomic_supported() { return 0; } int is_native_atomic_supported() { return 0; }
int get_major_version() const int get_major_version() const { return dpct::get_major_version(*this); }
{
return dpct::get_major_version(*this);
}
int get_minor_version() const int get_minor_version() const { return dpct::get_minor_version(*this); }
{
return dpct::get_minor_version(*this);
}
int get_max_compute_units() const int get_max_compute_units() const {
{
return get_device_info().get_max_compute_units(); return get_device_info().get_max_compute_units();
} }
/// Return the maximum clock frequency of this device in KHz. /// Return the maximum clock frequency of this device in KHz.
int get_max_clock_frequency() const int get_max_clock_frequency() const {
{
return get_device_info().get_max_clock_frequency(); return get_device_info().get_max_clock_frequency();
} }
int get_integrated() const { return get_device_info().get_integrated(); } int get_integrated() const { return get_device_info().get_integrated(); }
int get_max_sub_group_size() const int get_max_sub_group_size() const {
{
return get_device_info().get_max_sub_group_size(); return get_device_info().get_max_sub_group_size();
} }
int get_max_register_size_per_work_group() const int get_max_register_size_per_work_group() const {
{
return get_device_info().get_max_register_size_per_work_group(); return get_device_info().get_max_register_size_per_work_group();
} }
int get_max_work_group_size() const int get_max_work_group_size() const {
{
return get_device_info().get_max_work_group_size(); return get_device_info().get_max_work_group_size();
} }
int get_mem_base_addr_align() const int get_mem_base_addr_align() const {
{
return get_info<sycl::info::device::mem_base_addr_align>(); return get_info<sycl::info::device::mem_base_addr_align>();
} }
size_t get_global_mem_size() const size_t get_global_mem_size() const {
{
return get_device_info().get_global_mem_size(); return get_device_info().get_global_mem_size();
} }
size_t get_max_mem_alloc_size() const size_t get_max_mem_alloc_size() const {
{
return get_device_info().get_max_mem_alloc_size(); return get_device_info().get_max_mem_alloc_size();
} }
/// Get the number of bytes of free and total memory on the SYCL device. /// Get the number of bytes of free and total memory on the SYCL device.
/// \param [out] free_memory The number of bytes of free memory on the SYCL device. /// \param [out] free_memory The number of bytes of free memory on the
/// \param [out] total_memory The number of bytes of total memory on the SYCL device. /// SYCL device. \param [out] total_memory The number of bytes of total
void get_memory_info(size_t &free_memory, size_t &total_memory) /// memory on the SYCL device.
{ void get_memory_info(size_t &free_memory, size_t &total_memory) {
total_memory = get_device_info().get_global_mem_size(); total_memory = get_device_info().get_global_mem_size();
const char *warning_info = "get_memory_info: [warning] ext_intel_free_memory is not " const char *warning_info =
"get_memory_info: [warning] ext_intel_free_memory is not "
"supported (export/set ZES_ENABLE_SYSMAN=1 to support), " "supported (export/set ZES_ENABLE_SYSMAN=1 to support), "
"use total memory as free memory"; "use total memory as free memory";
#if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105) #if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
if (!has(sycl::aspect::ext_intel_free_memory)) if (!has(sycl::aspect::ext_intel_free_memory)) {
{
std::cerr << warning_info << std::endl; std::cerr << warning_info << std::endl;
free_memory = total_memory; free_memory = total_memory;
} } else {
else
{
free_memory = get_info<sycl::ext::intel::info::device::free_memory>(); free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
} }
#else #else
@ -690,164 +671,139 @@ namespace dpct
#endif #endif
} }
void get_device_info(device_info &out) const void get_device_info(device_info &out) const {
{
dpct::get_device_info(out, *this); dpct::get_device_info(out, *this);
} }
device_info get_device_info() const device_info get_device_info() const {
{
device_info prop; device_info prop;
dpct::get_device_info(prop, *this); dpct::get_device_info(prop, *this);
return prop; return prop;
} }
void reset() void reset() {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
clear_queues(); clear_queues();
init_queues(); init_queues();
} }
sycl::queue &in_order_queue() { return *_q_in_order; } sycl::queue &in_order_queue() { return _q_in_order; }
sycl::queue &out_of_order_queue() { return *_q_out_of_order; } sycl::queue &out_of_order_queue() { return _q_out_of_order; }
sycl::queue &default_queue() sycl::queue &default_queue() { return in_order_queue(); }
{
return in_order_queue();
}
void queues_wait_and_throw() void queues_wait_and_throw() {
{
std::unique_lock<mutex_type> lock(m_mutex); std::unique_lock<mutex_type> lock(m_mutex);
std::vector<std::shared_ptr<sycl::queue>> current_queues(
_queues);
lock.unlock(); lock.unlock();
for (const auto &q : current_queues) for (auto &q : _queues) {
{ q.wait_and_throw();
q->wait_and_throw();
} }
// Guard the destruct of current_queues to make sure the ref count is safe. // Guard the destruct of current_queues to make sure the ref count is
// safe.
lock.lock(); lock.lock();
} }
sycl::queue *create_queue(bool enable_exception_handler = false) sycl::queue create_queue(bool enable_exception_handler = false) {
{
return create_in_order_queue(enable_exception_handler); return create_in_order_queue(enable_exception_handler);
} }
sycl::queue *create_queue(sycl::context context, sycl::device device, sycl::queue create_queue(sycl::device device,
bool enable_exception_handler = false) { bool enable_exception_handler = false) {
return create_in_order_queue(context, device, enable_exception_handler); return create_in_order_queue(device, enable_exception_handler);
} }
sycl::queue *create_in_order_queue(bool enable_exception_handler = false) { sycl::queue create_in_order_queue(bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler, return create_queue_impl(enable_exception_handler,
sycl::property::queue::in_order()); sycl::property::queue::in_order());
} }
sycl::queue *create_in_order_queue(sycl::context context, sycl::device device, sycl::queue create_in_order_queue(sycl::device device,
bool enable_exception_handler = false) { bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(context, device, enable_exception_handler, return create_queue_impl(device, enable_exception_handler,
sycl::property::queue::in_order()); sycl::property::queue::in_order());
} }
sycl::queue *create_out_of_order_queue(bool enable_exception_handler = false) { sycl::queue create_out_of_order_queue(
bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler); return create_queue_impl(enable_exception_handler);
} }
void destroy_queue(sycl::queue *&queue) void destroy_queue(sycl::queue queue) {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
_queues.erase(std::remove_if(_queues.begin(), _queues.end(), _queues.clear();
[=](const std::shared_ptr<sycl::queue> &q) -> bool
{
return q.get() == queue;
}),
_queues.end());
queue = nullptr;
} }
void set_saved_queue(sycl::queue *q) void set_saved_queue(sycl::queue q) {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
_saved_queue = q; _saved_queue = q;
} }
sycl::queue *get_saved_queue() const sycl::queue get_saved_queue() const {
{
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return _saved_queue; return _saved_queue;
} }
sycl::context get_context() const { return _ctx; }
private: private:
void clear_queues() void clear_queues() { _queues.clear(); }
{
_queues.clear();
_q_in_order = _q_out_of_order = _saved_queue = nullptr;
}
void init_queues() void init_queues() {
{ _q_in_order =
_q_in_order = create_queue_impl(true, sycl::property::queue::in_order()); create_queue_impl(true, sycl::property::queue::in_order());
_q_out_of_order = create_queue_impl(true); _q_out_of_order = create_queue_impl(true);
_saved_queue = &default_queue(); _saved_queue = default_queue();
} }
/// Caller should acquire resource \p m_mutex before calling this function. /// Caller should acquire resource \p m_mutex before calling this
/// function.
template <class... Properties> template <class... Properties>
sycl::queue *create_queue_impl(bool enable_exception_handler, sycl::queue create_queue_impl(bool enable_exception_handler,
Properties... properties) Properties... properties) {
{
sycl::async_handler eh = {}; sycl::async_handler eh = {};
if (enable_exception_handler) if (enable_exception_handler) {
{
eh = exception_handler; eh = exception_handler;
} }
_queues.push_back(std::make_shared<sycl::queue>( auto q = sycl::queue(*this, eh,
_ctx, *this, eh,
sycl::property_list( sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED #ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(), sycl::property::queue::enable_profiling(),
#endif #endif
properties...))); properties...));
_queues.push_back(q);
return _queues.back().get(); return _queues.back();
} }
template <class... Properties> template <class... Properties>
sycl::queue *create_queue_impl(sycl::context context, sycl::device device, sycl::queue create_queue_impl(sycl::device device,
bool enable_exception_handler, bool enable_exception_handler,
Properties... properties) { Properties... properties) {
sycl::async_handler eh = {}; sycl::async_handler eh = {};
if (enable_exception_handler) { if (enable_exception_handler) {
eh = exception_handler; eh = exception_handler;
} }
_queues.push_back(std::make_shared<sycl::queue>( _queues.push_back(
context, device, eh, sycl::queue(device, eh,
sycl::property_list( sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED #ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(), sycl::property::queue::enable_profiling(),
#endif #endif
properties...))); properties...)));
return _queues.back().get(); return _queues.back();
} }
void get_version(int &major, int &minor) const void get_version(int &major, int &minor) const {
{
detail::get_version(*this, major, minor); detail::get_version(*this, major, minor);
} }
sycl::queue *_q_in_order, *_q_out_of_order; sycl::queue _q_in_order, _q_out_of_order;
sycl::queue *_saved_queue; sycl::queue _saved_queue;
sycl::context _ctx; std::vector<sycl::queue> _queues;
std::vector<std::shared_ptr<sycl::queue>> _queues;
mutable mutex_type m_mutex; mutable mutex_type m_mutex;
}; };
/// device manager /// device manager
class dev_mgr class dev_mgr
{ {

6
ggml.h
View File

@ -312,6 +312,12 @@
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb) GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif