cuda : add TODO for calling cublas from kernel + using mem pool

2024-12-26 03:14:35 +00:00 · 2023-10-24 16:33:24 +03:00 · 2023-10-24 16:33:24 +03:00 · d798a17c34
commit d798a17c34
parent 27c34c0112
1 changed files with 1 additions and 0 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
                CUBLAS_GEMM_DEFAULT_TENSOR_OP));
    } else {
        // use cublasGemmBatchedEx
+        // TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000
        const int ne23 = ne12*ne13;

        // TODO: avoid this alloc