From 2d5dd7bb3fa382806cd3e0bfc7a1d92349bc0ccf Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Tue, 6 Aug 2024 15:26:46 +0800 Subject: [PATCH] ggml : add epsilon as a parameter for group_norm (#8818) Signed-off-by: Molly Sophia --- ggml/include/ggml.h | 7 ++++--- ggml/src/ggml-cann/aclnn_ops.cpp | 4 +++- ggml/src/ggml-cuda/norm.cu | 9 ++++++--- ggml/src/ggml-metal.m | 6 ++---- ggml/src/ggml-sycl/norm.cpp | 9 ++++++--- ggml/src/ggml.c | 19 ++++++++++++------- tests/test-backend-ops.cpp | 8 +++++--- 7 files changed, 38 insertions(+), 24 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index a9e88e592..15602a96d 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1140,16 +1140,17 @@ extern "C" { // group normalize along ne0*ne1*n_groups // used in stable-diffusion - // TODO: eps is hardcoded to 1e-6 for now GGML_API struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups); + int n_groups, + float eps); GGML_API struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups); + int n_groups, + float eps); // a - x // b - dy diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 171439132..8c4132f5b 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -464,9 +464,11 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) { aclTensor* acl_src = ggml_cann_create_tensor(src); aclTensor* acl_dst = ggml_cann_create_tensor(dst); - const float eps = 1e-6f; // TODO: make this a parameter int n_groups = dst->op_params[0]; + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + uint64_t workspaceSize = 0; aclOpExecutor* executor; void* workspaceAddr = nullptr; diff --git a/ggml/src/ggml-cuda/norm.cu b/ggml/src/ggml-cuda/norm.cu index 30866d512..133e219f0 100644 --- a/ggml/src/ggml-cuda/norm.cu +++ b/ggml/src/ggml-cuda/norm.cu @@ -142,8 +142,7 @@ static void norm_f32_cuda(const float * x, float * dst, const int ncols, const i } } -static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const int group_size, const int ne_elements, cudaStream_t stream) { - static const float eps = 1e-6f; +static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const float eps, const int group_size, const int ne_elements, cudaStream_t stream) { if (group_size < 1024) { const dim3 block_dims(WARP_SIZE, 1, 1); group_norm_f32<<>>(x, dst, group_size, ne_elements, eps); @@ -196,8 +195,12 @@ void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) GGML_ASSERT( dst->type == GGML_TYPE_F32); int num_groups = dst->op_params[0]; + + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); - group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], group_size, ggml_nelements(src0), stream); + group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], eps, group_size, ggml_nelements(src0), stream); } void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index 48b813131..b512eb0be 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -2229,10 +2229,8 @@ static enum ggml_status ggml_metal_graph_compute( GGML_ASSERT(ne00 % 4 == 0); GGML_ASSERT(ggml_is_contiguous(src0)); - //float eps; - //memcpy(&eps, dst->op_params, sizeof(float)); - - const float eps = 1e-6f; // TODO: temporarily hardcoded + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); const int32_t n_groups = ((int32_t *) dst->op_params)[0]; diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index cccf87d06..b3159b9d1 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -225,9 +225,8 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols, } static void group_norm_f32_sycl(const float* x, float* dst, - const int num_groups, const int group_size, + const int num_groups, const float eps, const int group_size, const int ne_elements, queue_ptr stream, int device) { - static const float eps = 1e-6f; if (group_size < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); stream->submit([&](sycl::handler& cgh) { @@ -343,8 +342,12 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* GGML_ASSERT(dst->type == GGML_TYPE_F32); int num_groups = dst->op_params[0]; + + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); - group_norm_f32_sycl(src0_dd, dst_dd, num_groups, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); + group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); (void)src1; (void)dst; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 910981e4a..daceec414 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -5374,6 +5374,7 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups, + float eps, bool inplace) { bool is_node = false; @@ -5384,7 +5385,8 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - result->op_params[0] = n_groups; + ggml_set_op_params_i32(result, 0, n_groups); + ggml_set_op_params_f32(result, 1, eps); result->op = GGML_OP_GROUP_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -5396,15 +5398,17 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups) { - return ggml_group_norm_impl(ctx, a, n_groups, false); + int n_groups, + float eps) { + return ggml_group_norm_impl(ctx, a, n_groups, eps, false); } struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups) { - return ggml_group_norm_impl(ctx, a, n_groups, true); + int n_groups, + float eps) { + return ggml_group_norm_impl(ctx, a, n_groups, eps, true); } // ggml_mul_mat @@ -12095,10 +12099,11 @@ static void ggml_compute_forward_group_norm_f32( GGML_TENSOR_UNARY_OP_LOCALS - const float eps = 1e-6f; // TODO: make this a parameter - // TODO: optimize + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int n_channels = src0->ne[2]; int n_groups = dst->op_params[0]; int n_channels_per_group = (n_channels + n_groups - 1) / n_groups; diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 54cef05c3..2f4117a62 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1511,6 +1511,7 @@ struct test_group_norm : public test_case { const ggml_type type; const std::array ne; const int32_t num_groups; + const float eps; std::string vars() override { return VARS_TO_STR3(type, ne, num_groups); @@ -1518,12 +1519,13 @@ struct test_group_norm : public test_case { test_group_norm(ggml_type type = GGML_TYPE_F32, std::array ne = {64, 64, 320, 1}, - int32_t num_groups = 32) - : type(type), ne(ne), num_groups(num_groups) {} + int32_t num_groups = 32, + float eps = 1e-6f) + : type(type), ne(ne), num_groups(num_groups), eps(eps) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); - ggml_tensor * out = ggml_group_norm(ctx, a, num_groups); + ggml_tensor * out = ggml_group_norm(ctx, a, num_groups, eps); return out; } };