From d798a17c34f2326093d0cf2c0ea90b8fded15dc6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 24 Oct 2023 16:33:24 +0300 Subject: [PATCH] cuda : add TODO for calling cublas from kernel + using mem pool --- ggml-cuda.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 81f6e76e2..db053e3b8 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const CUBLAS_GEMM_DEFAULT_TENSOR_OP)); } else { // use cublasGemmBatchedEx + // TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000 const int ne23 = ne12*ne13; // TODO: avoid this alloc