cann: Add host buffer type for Ascend NPU (#9406)
Some checks failed
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Waiting to run
Python check requirements.txt / check-requirements (push) Has been cancelled

* feat: Add host buffer type for Ascend NPU(CANN backend)

* fix some checking errors

* Add a few comments
This commit is contained in:
Dou Xinpeng 2024-09-12 19:46:43 +08:00 committed by GitHub
parent e665744317
commit e6b7801bd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 121 additions and 0 deletions

View File

@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
*/
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
/**
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
*
* @return A pointer to the host buffer type interface.
*/
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
/**
* @brief Retrieves the description of a specific CANN device.
*

View File

@ -1221,6 +1221,116 @@ ggml_backend_cann_buffer_type(int32_t device) {
return &ggml_backend_cann_buffer_types[device];
}
/**
* @brief Retrieves the name associated with a CANN host buffer type.
*
* This function returns the descriptive name associated with the specified
* CANN host buffer type context.
*
* @param buft Pointer to the host buffer type context.
* @return Const pointer to the C-style string containing the name.
*/
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
return "CANN_Host";
GGML_UNUSED(buft);
}
/**
* @brief Retrieves the name associated with a CANN host buffer.
*
* This function returns the descriptive name associated with the specified
* CANN host buffer context.
*
* @param buft Pointer to the host buffer context.
* @return Const pointer to the C-style string containing the name.
*/
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
return "CANN_Host";
GGML_UNUSED(buffer);
}
/**
* @brief Free resources associated with a CANN host buffer.
*
* This function frees the resources associated with a CANN host buffer, including
* its context.
*
* @param buffer The CANN host buffer to free.
*/
GGML_CALL static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
ACL_CHECK(aclrtFreeHost(buffer->context));
}
/**
* @brief Allocates a new CANN host buffer of the specified size.
*
* This function allocates a new CANN host buffer with the given size.
* @param size Size in bytes of the host buffer to allocate.
* @return Pointer to the allocated host buffer, or nullptr if allocation fails.
*/
static void * ggml_cann_host_malloc(size_t size) {
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
return nullptr;
}
void * hostPtr = nullptr;
aclError err = aclrtMallocHost((void **) &hostPtr, size);
if (err != ACL_SUCCESS) {
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
return nullptr;
}
return hostPtr;
}
/**
* @brief Allocates a new CANN host buffer of the specified type and size.
*
* @param buft Pointer to the host buffer type context.
* @param size Size in bytes of the host buffer to allocate.
* @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
*/
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
void * hostPtr = ggml_cann_host_malloc(size);
if (hostPtr == nullptr) {
// fallback to cpu buffer
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
}
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
buffer->buft = buft;
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
return buffer;
}
/**
* @brief Interface for managing CANN host buffer types in the GGML backend.
*
* Provides function pointers for allocating, querying properties, and managing
* memory for CANN buffer types in the GGML backend.
*/
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
/* .iface = */ {
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
},
/* .context = */ nullptr,
};
return &ggml_backend_cann_buffer_type_host;
}
/**
* @brief Computes the forward operation for a given tensor using CANN
* operations.

View File

@ -2156,6 +2156,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
if (host_buffer) {
buft = ggml_backend_sycl_host_buffer_type();
}
#elif defined(GGML_USE_CANN)
if (host_buffer) {
buft = ggml_backend_cann_host_buffer_type();
}
#elif defined(GGML_USE_CPU_HBM)
buft = ggml_backend_cpu_hbm_buffer_type();
#elif defined(GGML_USE_VULKAN)