mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
cuda : add TODO for calling cublas from kernel + using mem pool
This commit is contained in:
parent
27c34c0112
commit
d798a17c34
@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
|
||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||
} else {
|
||||
// use cublasGemmBatchedEx
|
||||
// TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000
|
||||
const int ne23 = ne12*ne13;
|
||||
|
||||
// TODO: avoid this alloc
|
||||
|
Loading…
Reference in New Issue
Block a user