diff --git a/ggml/src/ggml-sycl.cpp b/ggml/src/ggml-sycl.cpp index 0d884f89a..d55673b58 100644 --- a/ggml/src/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl.cpp @@ -38,7 +38,9 @@ #include "ggml-sycl/backend.hpp" #include "ggml-sycl/presets.hpp" -#include "ggml-sycl/gemm.hpp" +#if GGML_SYCL_DNNL +#include "ggml-sycl/onednn/gemm.hpp" +#endif bool ggml_sycl_loaded(void); void ggml_sycl_free_data(struct ggml_tensor * tensor); @@ -3892,6 +3894,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens ggml_sycl_func_t func; switch (tensor->op) { + case GGML_OP_CONV_TRANSPOSE_2D: + func = ggml_sycl_op_conv_2d; + break; case GGML_OP_CONV_TRANSPOSE_1D: func = ggml_sycl_op_conv_transpose_1d; break; @@ -5007,6 +5012,10 @@ GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t back GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) { switch (op->op) { + case GGML_OP_CONV_TRANSPOSE_2D: + { + return true; + } break; case GGML_OP_CONV_TRANSPOSE_1D: { ggml_type src0_type = op->src[0]->type; diff --git a/ggml/src/ggml-sycl/conv.cpp b/ggml/src/ggml-sycl/conv.cpp index bc4ab1ddb..6b328e1a8 100644 --- a/ggml/src/ggml-sycl/conv.cpp +++ b/ggml/src/ggml-sycl/conv.cpp @@ -11,6 +11,7 @@ // #include "conv.hpp" +#include "onednn/convolution.hpp" static void conv_transpose_1d_kernel( const int s0, const int output_size, @@ -97,3 +98,30 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_ src0_d, src1_d, dst_d, stream); } + +void ggml_sycl_op_conv_2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, + const ggml_tensor *src1, ggml_tensor *dst) { + const void * src0_d = (const void *)src0->data; + const void * src1_d = (const void *)src1->data; + + void * dst_d = (void *)dst->data; + auto dnnl_stream = ctx.stream_dnnl(ctx.stream()); + + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(src1)); + + const int32_t * opts = (const int32_t *)dst->op_params; + + DnnlConvWrapper::conv_params params = { + opts[0], opts[1], opts[2], opts[3], opts[4], opts[5], true + }; + + DnnlConvWrapper::forward(dnnl_stream, + src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], + src1->ne[1], src1->ne[2], src1->ne[3], + params, + src0_d, DnnlConvWrapper::to_dt(), + src1_d, DnnlConvWrapper::to_dt(), + dst_d, DnnlConvWrapper::to_dt()); +} + diff --git a/ggml/src/ggml-sycl/conv.hpp b/ggml/src/ggml-sycl/conv.hpp index eb20730f9..0dc8f9906 100644 --- a/ggml/src/ggml-sycl/conv.hpp +++ b/ggml/src/ggml-sycl/conv.hpp @@ -18,4 +18,7 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst); +void ggml_sycl_op_conv_2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, + const ggml_tensor *src1, ggml_tensor *dst); + #endif // GGML_SYCL_CONV_HPP diff --git a/ggml/src/ggml-sycl/onednn/convolution.hpp b/ggml/src/ggml-sycl/onednn/convolution.hpp new file mode 100644 index 000000000..2215e8246 --- /dev/null +++ b/ggml/src/ggml-sycl/onednn/convolution.hpp @@ -0,0 +1,104 @@ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#ifndef GGML_SYCL_ONEDNN_CONV_HPP +#define GGML_SYCL_ONEDNN_CONV_HPP + +#include +#include + +#include "ggml-sycl.h" + +#if GGML_SYCL_DNNL + +#include "dnnl.hpp" +#include "dnnl_sycl.hpp" + +class DnnlConvWrapper { +public: + using dt = dnnl::memory::data_type; + using tag = dnnl::memory::format_tag; + struct conv_params { + int s0; + int s1; + int p0; + int p1; + int d0; + int d1; + bool is_2d; + }; + + template + static constexpr dt to_dt() { + if constexpr (std::is_same_v) return dt::f32; + else if constexpr (std::is_same_v) return dt::f16; + else static_assert(0); + } + + static inline void forward(const dnnl::stream& stream, + int n, int h, int w, int ic, int oc, int kh, int kw, + conv_params& params, + const void* a, dt at, const void* b, dt bt, void* c, dt ct) + { + auto const eng = stream.get_engine(); + dnnl::memory::dims a_dims, b_dims, c_dims; + dnnl::memory::desc a_md, b_md, c_md, bias_md; + dnnl::primitive_attr pattr; + + if(params.is_2d) { + a_dims = { n, ic, h, w }; + b_dims = { oc, ic, kh, kw }; + c_dims = { n, oc, h, w }; + a_md = dnnl::memory::desc(a_dims, at, tag::nchw); + b_md = dnnl::memory::desc(b_dims, bt, tag::oihw); + c_md = dnnl::memory::desc(c_dims, ct, tag::nchw); + } else { + a_dims = { n, ic, h }; + b_dims = { oc, ic, kh }; + c_dims = { n, oc, h }; + a_md = dnnl::memory::desc(a_dims, at, tag::ncw); + b_md = dnnl::memory::desc(b_dims, bt, tag::oiw); + c_md = dnnl::memory::desc(c_dims, ct, tag::ncw); + } + + auto a_mem = dnnl::memory(a_md, eng, (void*)a); + auto b_mem = dnnl::memory(b_md, eng, (void*)b); + + // Create the primitive. + auto conv_fwd_pd = dnnl::convolution_forward::primitive_desc( + eng, + dnnl::prop_kind::forward, + dnnl::algorithm::convolution_direct, + a_md, + b_md, + bias_md, + c_md, + {params.s0, params.s1}, + {params.d0, params.d1}, + {params.p0, params.p1}, + {params.p0, params.p1}, + pattr); + auto conv_fwd = dnnl::convolution_forward(conv_fwd_pd); + auto c_mem = dnnl::memory(conv_fwd_pd.dst_desc(), eng, c); + // Primitive arguments. + std::unordered_map conv_args; + conv_args.insert({ DNNL_ARG_SRC, a_mem }); + conv_args.insert({ DNNL_ARG_WEIGHTS, b_mem }); + conv_args.insert({ DNNL_ARG_DST, c_mem }); + + conv_fwd.execute(stream, conv_args); + } +}; + +#endif + +#endif // GGML_SYCL_ONEDNN_CONV_HPP diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml/src/ggml-sycl/onednn/gemm.hpp similarity index 97% rename from ggml/src/ggml-sycl/gemm.hpp rename to ggml/src/ggml-sycl/onednn/gemm.hpp index 2ad9b36f4..4fbd09e4f 100644 --- a/ggml/src/ggml-sycl/gemm.hpp +++ b/ggml/src/ggml-sycl/onednn/gemm.hpp @@ -10,8 +10,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -#ifndef GGML_SYCL_GEMM_HPP -#define GGML_SYCL_GEMM_HPP +#ifndef GGML_SYCL_ONEDNN_GEMM_HPP +#define GGML_SYCL_ONEDNN_GEMM_HPP #include #include @@ -98,4 +98,4 @@ public: #endif -#endif // GGML_SYCL_GEMM_HPP +#endif // GGML_SYCL_ONEDNN_GEMM_HPP diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d63c917a5..526e0fe1a 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -6770,6 +6770,35 @@ struct ggml_tensor * ggml_conv_2d( int p1, int d0, int d1) { +#ifdef GGML_SYCL_DNNL + bool is_node = false; + + if (a->grad || b->grad) { + GGML_ABORT("fatal error"); // TODO: implement backward + is_node = true; + } + + const int64_t OH = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); + const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); + + const int64_t ne[4] = { + OW, + OH, + a->ne[3], // OC + b->ne[3], // N + }; + + struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); + int32_t params[] = { s0, s1, p0, p1, d0, d1}; + ggml_set_op_params(result, params, sizeof(params)); + + result->op = GGML_OP_CONV_TRANSPOSE_2D; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + result->src[1] = b; + + return result; +#else struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW] struct ggml_tensor * result = @@ -6782,6 +6811,7 @@ struct ggml_tensor * ggml_conv_2d( return result; +#endif } // ggml_conv_2d_sk_p0 diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index e2b720844..7bc3d3bbc 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1308,6 +1308,35 @@ struct test_conv_transpose_1d : public test_case { } }; +struct test_conv_2d : public test_case { + const std::array ne_input; + const std::array ne_kernel; + + const int s0; // stride + const int p0; // padding + const int d0; // dilation + const int s1; // stride + const int p1; // padding + const int d1; // dilation + + std::string vars() override { + return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0); + } + + test_conv_2d(std::array ne_input = {197, 32, 1, 1}, // [input_width, input_height, input_channels, 1] + std::array ne_kernel = {16, 32, 1, 32}, // [kernel_width, kernel_height, input_channels, 1] + int s0 = 1, int p0 = 0, int d0 = 1, + int s1 = 1, int p1 = 0, int d1 = 1) + : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0), s1(s1), p1(p1), d1(d1){} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data()); + ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data()); + ggml_tensor * out = ggml_conv_2d(ctx, kernel, input, s0, s1, p0, p1, d0, d1); + return out; + } +}; + // GGML_OP_IM2COL struct test_im2col : public test_case { const ggml_type type_input; @@ -2160,6 +2189,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 1, 0, 1)); test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1)); test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1)); + test_cases.emplace_back(new test_conv_2d()); test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));