mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-15 15:29:53 +00:00
593 lines
25 KiB
C
593 lines
25 KiB
C
|
#ifndef CANN_ACLNN_OPS
|
|||
|
#define CANN_ACLNN_OPS
|
|||
|
|
|||
|
/**
|
|||
|
* @file acl_tensor
|
|||
|
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
|||
|
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
|||
|
* functions.
|
|||
|
* @author hipudding <huafengchun@gmail.com>
|
|||
|
* @author wangshuai09 <391746016@qq.com>
|
|||
|
* @date July 15, 2024
|
|||
|
*
|
|||
|
* Copyright (c) 2023-2024 The ggml authors
|
|||
|
*
|
|||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|||
|
* of this software and associated documentation files (the "Software"), to
|
|||
|
* deal in the Software without restriction, including without limitation the
|
|||
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|||
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|||
|
* furnished to do so, subject to the following conditions:
|
|||
|
*
|
|||
|
* The above copyright notice and this permission notice shall be included in
|
|||
|
* all copies or substantial portions of the Software.
|
|||
|
*
|
|||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|||
|
* IN THE SOFTWARE.
|
|||
|
*/
|
|||
|
|
|||
|
#include <aclnnop/aclnn_add.h>
|
|||
|
#include <aclnnop/aclnn_arange.h>
|
|||
|
#include <aclnnop/aclnn_argsort.h>
|
|||
|
#include <aclnnop/aclnn_cat.h>
|
|||
|
#include <aclnnop/aclnn_clamp.h>
|
|||
|
#include <aclnnop/aclnn_div.h>
|
|||
|
#include <aclnnop/aclnn_gelu.h>
|
|||
|
#include <aclnnop/aclnn_hardsigmoid.h>
|
|||
|
#include <aclnnop/aclnn_hardswish.h>
|
|||
|
#include <aclnnop/aclnn_leaky_relu.h>
|
|||
|
#include <aclnnop/aclnn_mul.h>
|
|||
|
#include <aclnnop/aclnn_relu.h>
|
|||
|
#include <aclnnop/aclnn_silu.h>
|
|||
|
#include <aclnnop/aclnn_tanh.h>
|
|||
|
#include "acl_tensor.h"
|
|||
|
#include "common.h"
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
|||
|
* of another tensor.
|
|||
|
*
|
|||
|
* @details This function repeats the elements of a source ggml tensor along
|
|||
|
* each dimension to create a destination tensor with the specified
|
|||
|
* dimensions. The operation is performed using the ACL backend and
|
|||
|
* executed asynchronously on the device.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The ggml tensor representing the destination, which op is
|
|||
|
* GGML_OP_REPEAT and specifies the desired dimensions.
|
|||
|
*/
|
|||
|
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Adds two ggml tensors using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function performs an element-wise addition of two tensors. In
|
|||
|
* case the tensors do not have the same shape, one or both tensors
|
|||
|
* will be broadcasted to match the shape of the other before the
|
|||
|
* addition is performed.The formula for the operation is given by:
|
|||
|
* \f[
|
|||
|
* \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
|
|||
|
* \f]
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The ggml tensor representing the destination, result of the
|
|||
|
* addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
|
|||
|
*/
|
|||
|
void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|||
|
* backend.
|
|||
|
*
|
|||
|
* @details This function computes the Leaky ReLU activation for each element of
|
|||
|
* the input tensor. The Leaky ReLU function allows a small gradient
|
|||
|
* when the unit is not active (i.e., when the input is negative). The
|
|||
|
* Leaky ReLU function is defined as:
|
|||
|
* \f[
|
|||
|
* \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
|
|||
|
* src)
|
|||
|
* \f]
|
|||
|
* `negativeSlope` is in dst->params.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the result of the Leaky ReLU
|
|||
|
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
|||
|
*/
|
|||
|
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Concatenates multiple tensors along a specified dimension using the
|
|||
|
* CANN backend.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param tensorList A pointer to the list of tensors to be concatenated.
|
|||
|
* @param dst The destination tensor where the result of the
|
|||
|
* concatenation is stored. dst->op is `GGML_OP_CONCAT`.
|
|||
|
* @param concat_dim The dimension along which the tensors are concatenated.
|
|||
|
*
|
|||
|
* @attention tensorList length should be 2 and the dimension using for concat
|
|||
|
* default to 1.
|
|||
|
*/
|
|||
|
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Generates a sequence of evenly spaced values within a specified
|
|||
|
* interval for a ggml tensor using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function creates a sequence of numbers over a specified i
|
|||
|
* nterval, starting from `start`, ending before `stop`, and
|
|||
|
* incrementing by `step`. The sequence is stored in the destination
|
|||
|
* tensor `dst`.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the generated sequence will be stored.
|
|||
|
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
|||
|
* `GGML_OP_ARANGE`.
|
|||
|
*/
|
|||
|
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the square of the elements of a ggml tensor using the CANN
|
|||
|
* backend.
|
|||
|
* @details The function sets the second source tensor of the destination
|
|||
|
* tensor `dst` to be equal to the first source tensor. This is
|
|||
|
* effectively squaring the elements since the multiplication becomes
|
|||
|
* `element * element`.
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the squared values will be stored,
|
|||
|
* which dst->op is `GGML_OP_SQR`.
|
|||
|
*/
|
|||
|
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
|||
|
* CANN backend.
|
|||
|
*
|
|||
|
* @details This function clamps the elements of the input tensor `src` to a
|
|||
|
* specified range defined by `min` and `max` values. The result is
|
|||
|
* stored in the destination tensor `dst`. The operation is defined as:
|
|||
|
* \f[
|
|||
|
* y = \max(\min(x, max\_value), min\_value)
|
|||
|
* \f]
|
|||
|
* where `x` is an element of the input tensor, and `y` is the
|
|||
|
* corresponding element in the output tensor.
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the clamped values will be stored.
|
|||
|
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
|||
|
*/
|
|||
|
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
|||
|
* CANN backend.
|
|||
|
*
|
|||
|
* @details This function multiplies each element of the input tensor `src` by
|
|||
|
* a scaling factor `scale`, storing the result in the destination
|
|||
|
* tensor `dst`. The operation is defined as:
|
|||
|
* \f[
|
|||
|
* dst = src \times scale
|
|||
|
* \f]
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the scaled values will be stored.
|
|||
|
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
|||
|
*/
|
|||
|
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
|||
|
* would sort the tensor using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function performs an argsort operation on the input tensor
|
|||
|
* `src`. It sorts the elements of `src` in either ascending or
|
|||
|
* descending order, depending on the `GGML_SORT_ORDER_DESC`,
|
|||
|
* and returns the indices that would sort the original tensor.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the sorted indices will be stored.
|
|||
|
* dst->op is `GGML_OP_ARGSORT`.
|
|||
|
*/
|
|||
|
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
|||
|
* backend.
|
|||
|
*
|
|||
|
* @details This function applies the Layer Normalization operation on the
|
|||
|
* input tensor `src` and stores the result in the destination tensor
|
|||
|
* `dst`. Layer Normalization normalizes the features at each sample in
|
|||
|
* a mini-batch independently. It is commonly used in neural networks
|
|||
|
* to normalize the activations of a layer by adjusting and scaling
|
|||
|
* the outputs.
|
|||
|
* The operation is defined as:
|
|||
|
* \f[
|
|||
|
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
|||
|
* \f]
|
|||
|
* `Var` defaults dst->ne[0]. `eps` is in dst->params.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the normalized values will be stored.
|
|||
|
* @attention `Var` defaults to dst->ne[0].
|
|||
|
*/
|
|||
|
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
|||
|
* backend.
|
|||
|
*
|
|||
|
* @brief This function applies the Group Normalization operation on the input
|
|||
|
* tensor `src` and stores the result in the destination tensor `dst`.
|
|||
|
* Group Normalization divides the channels into groups and normalizes
|
|||
|
* the features within each group across spatial locations.
|
|||
|
* It is commonly used in convolutional neural networks to improve
|
|||
|
* training stability and performance.
|
|||
|
* The operation is defined as:
|
|||
|
* \f[
|
|||
|
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
|||
|
* \f]
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the normalized values will be stored.
|
|||
|
* `n_groups` is in dst->params, which split C channel to `n_groups`.
|
|||
|
* dst->op is `GGML_OP_GROUP_NORM`.
|
|||
|
*
|
|||
|
* @attention eps defaults to 1e-6f.
|
|||
|
*/
|
|||
|
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the accumulation of tensors using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function performs an accumulation operation on two tensors.
|
|||
|
* Depending on the `inplace` flag, it either updates the destination
|
|||
|
* tensor `dst` in place by adding `alpha * src1` to it, or it creates
|
|||
|
* a new tensor as the result of `src0 + alpha * src1` and stores it in
|
|||
|
* `dst`.
|
|||
|
* The operation is defined as:
|
|||
|
* \f[
|
|||
|
* dst = src0 + alpha \times src1
|
|||
|
* \f]
|
|||
|
* if `inplace` is `true`, `src0` is equal to 'dst'.
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the accumulated values will be stored.
|
|||
|
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
|||
|
*/
|
|||
|
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
|||
|
* using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function performs a reduction sum operation along the last
|
|||
|
* dimension of the input tensor `src`. The result of the sum is stored
|
|||
|
* in the destination tensor `dst`.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the reduced values will be stored。
|
|||
|
* dst->op is `GGML_OP_SUM_ROWS`.
|
|||
|
*
|
|||
|
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
|||
|
*/
|
|||
|
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|||
|
* the CANN backend.
|
|||
|
*
|
|||
|
* @details This function performs upsampling of the input tensor `src` using
|
|||
|
* nearest neighbor interpolation. The upsampling is applied to the
|
|||
|
* height and width dimensions (last two dimensions) of the tensor. The
|
|||
|
* result is stored in the destination tensor `dst`, which must have
|
|||
|
* the appropriate dimensions for the upsampled output.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the upsampled values will be stored.
|
|||
|
* dst->op is `GGML_OP_UPSCALE`.
|
|||
|
*/
|
|||
|
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
|||
|
ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
|||
|
* using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function pads the input tensor `src` so that it matches the
|
|||
|
* dimensions of the destination tensor `dst`. The amount of padding
|
|||
|
* is calculated based on the difference in sizes between `src` and
|
|||
|
* `dst` along each dimension. The padded tensor is stored in `dst`.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor, which specifies the target dimensions for
|
|||
|
* padding. dst->op is `GGML_OP_PAD`.
|
|||
|
*/
|
|||
|
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
|||
|
* backend.
|
|||
|
*
|
|||
|
* @details This function dispatches the execution of a 2D pooling operation on
|
|||
|
* the input tensor `dst`. The type of pooling (average or max) is
|
|||
|
* determined by the `op` parameter, which is read from the operation
|
|||
|
* parameters of `dst`. The function supports average pooling
|
|||
|
* (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
|
|||
|
* invalid operation is encountered, the function asserts a failure.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor on which the pooling operation is to be
|
|||
|
* performed. dst->op is `GGML_OP_POOL_2D`.
|
|||
|
*/
|
|||
|
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Duplicates a ggml tensor using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function duplicates the contents of the source tensor `src` to
|
|||
|
* the destination tensor `dst`. The function supports various tensor
|
|||
|
* types and configurations, including handling of extra data, type
|
|||
|
* conversions, and special cases for contiguous and non-contiguous
|
|||
|
* tensors.
|
|||
|
*
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the duplicated data will be stored.
|
|||
|
* dst->op is `GGML_OP_DUP`
|
|||
|
*
|
|||
|
* @attention Only support Fp16/FP32. Not support when src and dst have
|
|||
|
* different shape and dst is no-contiguous.
|
|||
|
* @note: This func need to simplify.
|
|||
|
*/
|
|||
|
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
|||
|
* using the CANN backend.
|
|||
|
*
|
|||
|
* @details This function applies RMS normalization to the input tensor `src`
|
|||
|
* and stores the result in the destination tensor `dst`. RMS
|
|||
|
* normalization involves computing the root mean square of the input
|
|||
|
* tensor along a specified dimension and then dividing each element of
|
|||
|
* the tensor by this value, adjusted by a small epsilon value to
|
|||
|
* prevent division by zero.
|
|||
|
* The operation is defined as:
|
|||
|
* \f[
|
|||
|
* \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
|
|||
|
* \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
|
|||
|
* \f]
|
|||
|
* `eps` is in dst->op_params.
|
|||
|
* @param ctx The CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the normalized values will be stored.
|
|||
|
* dst->op is `GGML_OP_RMS_NORM`.
|
|||
|
*/
|
|||
|
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Applies a diagonal mask to the tensor with a specified value.
|
|||
|
*
|
|||
|
* @details This function creates a mask tensor filled with ones, then applies
|
|||
|
* an upper triangular and lower triangular operation to it based on
|
|||
|
* the number of past elements specified. Afterward, it adds the masked
|
|||
|
* tensor to the destination tensor in-place.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context used for operations.
|
|||
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|||
|
* `GGML_OP_DIAG_MASK`
|
|||
|
* @param value The value to use for masking.
|
|||
|
*/
|
|||
|
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Performs an image-to-column transformation on the input tensor.
|
|||
|
*
|
|||
|
* @details This function takes an input tensor and applies an image-to-column
|
|||
|
* operation, converting spatial dimensions into column-like
|
|||
|
* structures suitable for convolutional operations. It supports both
|
|||
|
* half-precision (F16) and single-precision (F32) floating-point data
|
|||
|
* types.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor that stores the result of the operation.
|
|||
|
* dst->op is `GGML_OP_IM2COL`.
|
|||
|
*/
|
|||
|
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes time step embeddings using sine and cosine functions.
|
|||
|
*
|
|||
|
* @details This function calculates time step embeddings by applying sine and
|
|||
|
* cosine transformations to a given input tensor, which is typically
|
|||
|
* used in temporal models like diffusion models or transformers to
|
|||
|
* encode time information effectively.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor where the result of the embedding operation
|
|||
|
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
|||
|
*/
|
|||
|
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
// @see ggml_cann_dup.
|
|||
|
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Computes the softmax activation with optional masking.
|
|||
|
*
|
|||
|
* @details This function computes the softmax activation over the input tensor,
|
|||
|
* optionally applying a mask and scaling factor. It supports both FP16
|
|||
|
* and FP32 data types and can handle masking by broadcasting the mask
|
|||
|
* across rows if necessary.
|
|||
|
* The function performs the following steps:
|
|||
|
* 1. Multiplies the input tensor by a scale factor.
|
|||
|
* 2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
|
|||
|
* 3. Broadcasts the mask tensor if its dimensions do not match the
|
|||
|
* input tensor's dimensions.
|
|||
|
* 4. Adds the mask to the scaled input tensor.
|
|||
|
* 5. Applies the softmax activation function along the specified
|
|||
|
* dimension.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor where the result will be stored. dst->op is
|
|||
|
* `GGML_OP_SOFTMAX`.
|
|||
|
*/
|
|||
|
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Extracts specific rows from a tensor based on indices.
|
|||
|
*
|
|||
|
* @details This function retrieves rows from a source tensor src0 according to
|
|||
|
* the indices provided in another tensor src1 and stores the result in
|
|||
|
* a destination tensor (\p dst). It supports different data types
|
|||
|
* including F32, F16, Q4_0, and Q8_0.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor where the extracted rows will be stored.
|
|||
|
* dst->op is `GGML_OP_GET_ROWS`.
|
|||
|
*/
|
|||
|
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Executes matrix multiplication for the given tensor.
|
|||
|
*
|
|||
|
* @details This function performs matrix multiplication on the source tensors
|
|||
|
* associated with the destination tensor. It supports matrix
|
|||
|
* multiplication F32, F16, and Q8_0.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor for storing the result of the matrix
|
|||
|
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
|||
|
*/
|
|||
|
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
/**
|
|||
|
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
|||
|
*
|
|||
|
* @details This function implements the RoPE mechanism, which is a method to
|
|||
|
* encode positional information into sequence data, particularly
|
|||
|
* useful in transformer models. It supports both F32 and F16 data
|
|||
|
* types.
|
|||
|
*
|
|||
|
* @param ctx The backend CANN context for executing operations.
|
|||
|
* @param dst The destination tensor where the RoPE-transformed data will be
|
|||
|
* stored. dst->op is `GGML_OP_ROPE`.
|
|||
|
*
|
|||
|
* @note The function currently does not support cases where the n_dims is less
|
|||
|
* than the input tensor's first dimension.
|
|||
|
* @note The function currently does not support cases where the freq_factors is
|
|||
|
* not NULL.
|
|||
|
* @note The function currently does not support cases where the ext_factor is
|
|||
|
* not equal 0.
|
|||
|
* @note The function currently does not support cases where the freq_scale is
|
|||
|
* not equal 1.
|
|||
|
*/
|
|||
|
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||
|
|
|||
|
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
|||
|
aclTensor*, uint64_t*, aclOpExecutor**),
|
|||
|
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
|
|||
|
void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
|
ggml_tensor* src0 = dst->src[0];
|
|||
|
ggml_tensor* src1 = dst->src[1];
|
|||
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|||
|
|
|||
|
aclTensor* acl_src0;
|
|||
|
aclTensor* acl_src1;
|
|||
|
aclTensor* acl_dst;
|
|||
|
|
|||
|
// Need bcast
|
|||
|
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
|||
|
BCAST_SHAPE(src0, src1)
|
|||
|
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
|||
|
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
|||
|
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
|||
|
} else {
|
|||
|
acl_src0 = ggml_cann_create_tensor(src0);
|
|||
|
acl_src1 = ggml_cann_create_tensor(src1);
|
|||
|
acl_dst = ggml_cann_create_tensor(dst);
|
|||
|
}
|
|||
|
|
|||
|
uint64_t workspaceSize = 0;
|
|||
|
aclOpExecutor* executor;
|
|||
|
void* workspaceAddr = nullptr;
|
|||
|
|
|||
|
ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
|
|||
|
&executor));
|
|||
|
if (workspaceSize > 0) {
|
|||
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|||
|
workspaceAddr = workspace_allocator.get();
|
|||
|
}
|
|||
|
|
|||
|
aclrtStream main_stream = ctx.stream();
|
|||
|
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
|||
|
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_src0));
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_src1));
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|||
|
}
|
|||
|
|
|||
|
// Activation functions template.
|
|||
|
template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
|
|||
|
aclOpExecutor**),
|
|||
|
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
|||
|
const aclrtStream)>
|
|||
|
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
|
ggml_tensor* src = dst->src[0];
|
|||
|
|
|||
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
|||
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|||
|
|
|||
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|||
|
|
|||
|
uint64_t workspaceSize = 0;
|
|||
|
aclOpExecutor* executor;
|
|||
|
void* workspaceAddr = nullptr;
|
|||
|
|
|||
|
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
|||
|
if (workspaceSize > 0) {
|
|||
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|||
|
workspaceAddr = workspace_allocator.get();
|
|||
|
}
|
|||
|
|
|||
|
aclrtStream main_stream = ctx.stream();
|
|||
|
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
|||
|
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|||
|
}
|
|||
|
|
|||
|
// Activation functions template for const aclTensors.
|
|||
|
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
|||
|
uint64_t*, aclOpExecutor**),
|
|||
|
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
|||
|
const aclrtStream)>
|
|||
|
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||
|
ggml_tensor* src = dst->src[0];
|
|||
|
|
|||
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
|||
|
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|||
|
|
|||
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
|||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
|||
|
|
|||
|
uint64_t workspaceSize = 0;
|
|||
|
aclOpExecutor* executor;
|
|||
|
void* workspaceAddr = nullptr;
|
|||
|
|
|||
|
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
|||
|
if (workspaceSize > 0) {
|
|||
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|||
|
workspaceAddr = workspace_allocator.get();
|
|||
|
}
|
|||
|
|
|||
|
aclrtStream main_stream = ctx.stream();
|
|||
|
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
|||
|
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
|||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|||
|
}
|
|||
|
|
|||
|
#endif // CANN_ACLNN_OPS
|