cuda : add TODO for calling cublas from kernel + using mem pool

This commit is contained in:
Georgi Gerganov 2023-10-24 16:33:24 +03:00
parent 27c34c0112
commit d798a17c34
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
} else {
// use cublasGemmBatchedEx
// TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000
const int ne23 = ne12*ne13;
// TODO: avoid this alloc