From 1be2b8c19b318a4637682719bc6c03f4ea0823d3 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 20 Sep 2023 15:12:51 +0200 Subject: [PATCH] ggml : revert change to ggml_cpy, add ggml_cont_Nd instead (#3275) ggml-ci --- ggml.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- ggml.h | 28 +++++++++++++++++++++++++++- llama.cpp | 14 ++++---------- 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/ggml.c b/ggml.c index 764865939..35751342f 100644 --- a/ggml.c +++ b/ggml.c @@ -6343,7 +6343,7 @@ static struct ggml_tensor * ggml_cpy_impl( } // make a view of the destination - struct ggml_tensor * result = b->op == GGML_OP_NONE ? b : ggml_view_tensor(ctx, b); + struct ggml_tensor * result = ggml_view_tensor(ctx, b); if (strlen(b->name) > 0) { ggml_format_name(result, "%s (copy of %s)", b->name, a->name); } else { @@ -6406,6 +6406,54 @@ struct ggml_tensor * ggml_cont_inplace( return ggml_cont_impl(ctx, a, true); } + +// make contiguous, with new shape +GGML_API struct ggml_tensor * ggml_cont_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0) { + return ggml_cont_4d(ctx, a, ne0, 1, 1, 1); +} + +GGML_API struct ggml_tensor * ggml_cont_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1) { + return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1); +} + +GGML_API struct ggml_tensor * ggml_cont_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2) { + return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1); +} + +struct ggml_tensor * ggml_cont_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3) { + GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3)); + + bool is_node = false; + + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); + ggml_format_name(result, "%s (cont)", a->name); + + result->op = GGML_OP_CONT; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + + return result; +} + + // ggml_reshape struct ggml_tensor * ggml_reshape( diff --git a/ggml.h b/ggml.h index e2bfa1ae4..545ca438a 100644 --- a/ggml.h +++ b/ggml.h @@ -1049,7 +1049,6 @@ extern "C" { size_t nb1, size_t offset); - // a -> b, return view(b) GGML_API struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, @@ -1072,6 +1071,33 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // make contiguous, with new shape + GGML_API struct ggml_tensor * ggml_cont_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0); + + GGML_API struct ggml_tensor * ggml_cont_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1); + + GGML_API struct ggml_tensor * ggml_cont_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2); + + GGML_API struct ggml_tensor * ggml_cont_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3); + // return view(a), b specifies the new shape // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape( diff --git a/llama.cpp b/llama.cpp index e3c3568c8..1576c3b86 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2893,9 +2893,7 @@ static struct ggml_cgraph * llm_build_llama( ggml_set_name(KQV_merged, "KQV_merged"); // cur = KQV_merged.contiguous().view(n_embd, n_tokens) - cur = ggml_cpy(ctx0, - KQV_merged, - ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_tokens)); + cur = ggml_cont_2d(ctx0, KQV_merged, n_embd, n_tokens); offload_func_v(cur); ggml_set_name(cur, "KQV_merged_contiguous"); @@ -3302,9 +3300,7 @@ static struct ggml_cgraph * llm_build_baichaun( ggml_set_name(KQV_merged, "KQV_merged"); // cur = KQV_merged.contiguous().view(n_embd, n_tokens) - cur = ggml_cpy(ctx0, - KQV_merged, - ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_tokens)); + cur = ggml_cont_2d(ctx0, KQV_merged, n_embd, n_tokens); offload_func_v(cur); ggml_set_name(cur, "KQV_merged_contiguous"); @@ -3710,7 +3706,7 @@ static struct ggml_cgraph * llm_build_falcon( offload_func_v(KQV_merged); ggml_set_name(KQV_merged, "KQV_merged"); - cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_tokens)); + cur = ggml_cont_2d(ctx0, KQV_merged, n_embd, n_tokens); offload_func_v(cur); ggml_set_name(cur, "KQV_merged_contiguous"); @@ -3964,9 +3960,7 @@ static struct ggml_cgraph * llm_build_starcoder( ggml_set_name(KQV_merged, "KQV_merged"); // cur = KQV_merged.contiguous().view(n_embd, n_tokens) - cur = ggml_cpy(ctx0, - KQV_merged, - ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_tokens)); + cur = ggml_cont_2d(ctx0, KQV_merged, n_embd, n_tokens); ggml_set_name(cur, "KQV_merged_contiguous"); }