backup

backup backup backup
2025-01-12 03:31:46 +00:00 · 2024-08-20 08:23:41 +00:00 · 2024-08-20 08:23:41 +00:00 · b180cb352b
commit b180cb352b
parent a07c32ea54
7 changed files with 208 additions and 4 deletions
--- a/ggml/src/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl.cpp
@ -38,7 +38,9 @@

 #include "ggml-sycl/backend.hpp"
 #include "ggml-sycl/presets.hpp"
-#include "ggml-sycl/gemm.hpp"
+#if GGML_SYCL_DNNL
+#include "ggml-sycl/onednn/gemm.hpp"
+#endif

 bool   ggml_sycl_loaded(void);
 void   ggml_sycl_free_data(struct ggml_tensor * tensor);
@ -3892,6 +3894,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
    ggml_sycl_func_t func;

    switch (tensor->op) {
+        case GGML_OP_CONV_TRANSPOSE_2D:
+            func = ggml_sycl_op_conv_2d;
+            break;
        case GGML_OP_CONV_TRANSPOSE_1D:
            func = ggml_sycl_op_conv_transpose_1d;
            break;
@ -5007,6 +5012,10 @@ GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t back

 GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
    switch (op->op) {
+        case GGML_OP_CONV_TRANSPOSE_2D:
+            {
+                return true;
+            } break;
        case GGML_OP_CONV_TRANSPOSE_1D:
            {
                ggml_type src0_type = op->src[0]->type;
--- a/ggml/src/ggml-sycl/conv.cpp
+++ b/ggml/src/ggml-sycl/conv.cpp
@ -11,6 +11,7 @@
 //

 #include "conv.hpp"
+#include "onednn/convolution.hpp"

 static  void conv_transpose_1d_kernel(
        const int s0, const int output_size,
@ -97,3 +98,30 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_
        src0_d, src1_d, dst_d, stream);
 }

+
+void ggml_sycl_op_conv_2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
+    const ggml_tensor *src1, ggml_tensor *dst) {
+    const void * src0_d = (const void *)src0->data;
+    const void * src1_d = (const void *)src1->data;
+
+    void * dst_d = (void *)dst->data;
+    auto dnnl_stream = ctx.stream_dnnl(ctx.stream());
+
+    GGML_ASSERT(ggml_is_contiguous(src0));
+    GGML_ASSERT(ggml_is_contiguous(src1));
+
+    const int32_t * opts = (const int32_t *)dst->op_params;
+
+    DnnlConvWrapper::conv_params params = {
+        opts[0], opts[1], opts[2], opts[3], opts[4], opts[5], true
+    };
+
+    DnnlConvWrapper::forward(dnnl_stream,
+        src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
+        src1->ne[1], src1->ne[2], src1->ne[3],
+        params,
+        src0_d, DnnlConvWrapper::to_dt<float>(),
+        src1_d, DnnlConvWrapper::to_dt<float>(),
+        dst_d, DnnlConvWrapper::to_dt<float>());
+}
+
--- a/ggml/src/ggml-sycl/conv.hpp
+++ b/ggml/src/ggml-sycl/conv.hpp
@ -18,4 +18,7 @@
 void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
  const ggml_tensor *src1, ggml_tensor *dst);

+void ggml_sycl_op_conv_2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
+  const ggml_tensor *src1, ggml_tensor *dst);
+
 #endif // GGML_SYCL_CONV_HPP
--- a/ggml/src/ggml-sycl/onednn/convolution.hpp
+++ b/ggml/src/ggml-sycl/onednn/convolution.hpp
@ -0,0 +1,104 @@
+//
+// MIT license
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: MIT
+//
+
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef GGML_SYCL_ONEDNN_CONV_HPP
+#define GGML_SYCL_ONEDNN_CONV_HPP
+
+#include <fstream>
+#include <iostream>
+
+#include "ggml-sycl.h"
+
+#if GGML_SYCL_DNNL
+
+#include "dnnl.hpp"
+#include "dnnl_sycl.hpp"
+
+class DnnlConvWrapper {
+public:
+    using dt = dnnl::memory::data_type;
+    using tag = dnnl::memory::format_tag;
+    struct conv_params {
+        int s0;
+        int s1;
+        int p0;
+        int p1;
+        int d0;
+        int d1;
+        bool is_2d;
+    };
+
+    template<typename T>
+    static constexpr dt to_dt() {
+        if constexpr (std::is_same_v<T, float>) return dt::f32;
+        else if constexpr (std::is_same_v<T, sycl::half>) return dt::f16;
+        else static_assert(0);
+    }
+
+    static inline void forward(const dnnl::stream& stream,
+        int n, int h, int w, int ic, int oc, int kh, int kw,
+        conv_params& params,
+        const void* a, dt at, const void* b, dt bt, void* c, dt ct)
+    {
+        auto const eng = stream.get_engine();
+        dnnl::memory::dims a_dims, b_dims, c_dims;
+        dnnl::memory::desc a_md, b_md, c_md, bias_md;
+        dnnl::primitive_attr pattr;
+
+        if(params.is_2d) {
+            a_dims = { n, ic, h, w };
+            b_dims = { oc, ic, kh, kw };
+            c_dims = { n, oc, h, w };
+            a_md = dnnl::memory::desc(a_dims, at, tag::nchw);
+            b_md = dnnl::memory::desc(b_dims, bt, tag::oihw);
+            c_md = dnnl::memory::desc(c_dims, ct, tag::nchw);
+        } else {
+            a_dims = { n, ic, h };
+            b_dims = { oc, ic, kh };
+            c_dims = { n, oc, h };
+            a_md = dnnl::memory::desc(a_dims, at, tag::ncw);
+            b_md = dnnl::memory::desc(b_dims, bt, tag::oiw);
+            c_md = dnnl::memory::desc(c_dims, ct, tag::ncw);
+        }
+
+        auto a_mem = dnnl::memory(a_md, eng, (void*)a);
+        auto b_mem = dnnl::memory(b_md, eng, (void*)b);
+
+        // Create the primitive.
+        auto   conv_fwd_pd = dnnl::convolution_forward::primitive_desc(
+            eng,
+            dnnl::prop_kind::forward,
+            dnnl::algorithm::convolution_direct,
+            a_md,
+            b_md,
+            bias_md,
+            c_md,
+            {params.s0, params.s1},
+            {params.d0, params.d1},
+            {params.p0, params.p1},
+            {params.p0, params.p1},
+            pattr);
+        auto conv_fwd = dnnl::convolution_forward(conv_fwd_pd);
+        auto c_mem = dnnl::memory(conv_fwd_pd.dst_desc(), eng, c);
+        // Primitive arguments.
+        std::unordered_map<int, dnnl::memory> conv_args;
+        conv_args.insert({ DNNL_ARG_SRC, a_mem });
+        conv_args.insert({ DNNL_ARG_WEIGHTS, b_mem });
+        conv_args.insert({ DNNL_ARG_DST, c_mem });
+
+        conv_fwd.execute(stream, conv_args);
+    }
+};
+
+#endif
+
+#endif // GGML_SYCL_ONEDNN_CONV_HPP
--- a/ggml/src/ggml-sycl/onednn/gemm.hpp
+++ b/ggml/src/ggml-sycl/onednn/gemm.hpp
@ -10,8 +10,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //

-#ifndef GGML_SYCL_GEMM_HPP
-#define GGML_SYCL_GEMM_HPP
+#ifndef GGML_SYCL_ONEDNN_GEMM_HPP
+#define GGML_SYCL_ONEDNN_GEMM_HPP

 #include <fstream>
 #include <iostream>
@ -98,4 +98,4 @@ public:

 #endif

-#endif // GGML_SYCL_GEMM_HPP
+#endif // GGML_SYCL_ONEDNN_GEMM_HPP
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -6770,6 +6770,35 @@ struct ggml_tensor * ggml_conv_2d(
        int                  p1,
        int                  d0,
        int                  d1) {
+#ifdef GGML_SYCL_DNNL
+    bool is_node = false;
+
+    if (a->grad || b->grad) {
+        GGML_ABORT("fatal error"); // TODO: implement backward
+        is_node = true;
+    }
+
+    const int64_t OH = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
+    const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
+
+    const int64_t ne[4] = {
+        OW,
+        OH,
+        a->ne[3], // OC
+        b->ne[3], // N
+    };
+
+    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
+    int32_t params[] = { s0, s1, p0, p1, d0, d1};
+    ggml_set_op_params(result, params, sizeof(params));
+
+    result->op = GGML_OP_CONV_TRANSPOSE_2D;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = b;
+
+    return result;
+#else
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]

    struct ggml_tensor * result =
@ -6782,6 +6811,7 @@ struct ggml_tensor * ggml_conv_2d(


    return result;
+#endif
 }

 // ggml_conv_2d_sk_p0
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@ -1308,6 +1308,35 @@ struct test_conv_transpose_1d : public test_case {
    }
 };

+struct test_conv_2d : public test_case {
+    const std::array<int64_t, 4> ne_input;
+    const std::array<int64_t, 4> ne_kernel;
+
+    const int s0; // stride
+    const int p0; // padding
+    const int d0; // dilation
+    const int s1; // stride
+    const int p1; // padding
+    const int d1; // dilation
+
+    std::string vars() override {
+        return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0);
+    }
+
+    test_conv_2d(std::array<int64_t, 4> ne_input = {197, 32, 1, 1}, // [input_width, input_height, input_channels, 1]
+                           std::array<int64_t, 4> ne_kernel = {16, 32, 1, 32}, // [kernel_width, kernel_height, input_channels, 1]
+                           int s0 = 1, int p0 = 0, int d0 = 1,
+                           int s1 = 1, int p1 = 0, int d1 = 1)
+        : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0), s1(s1), p1(p1), d1(d1){}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
+        ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
+        ggml_tensor * out = ggml_conv_2d(ctx, kernel, input, s0, s1, p0, p1, d0, d1);
+        return out;
+    }
+};
+
 // GGML_OP_IM2COL
 struct test_im2col : public test_case {
    const ggml_type type_input;
@ -2160,6 +2189,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
    test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 1, 0, 1));
    test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
    test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
+    test_cases.emplace_back(new test_conv_2d());


    test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));