mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
CUDA: fix MMQ stream-k for --split-mode row (#8167)
This commit is contained in:
parent
f675b20a3b
commit
85a267daaa
@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
|
|||||||
|
|
||||||
const dim3 block_nums_mmq(nsm, 1, 1);
|
const dim3 block_nums_mmq(nsm, 1, 1);
|
||||||
|
|
||||||
ggml_cuda_pool & pool = ctx.pool();
|
ggml_cuda_pool & pool = ctx.pool(id);
|
||||||
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
|
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
|
||||||
|
|
||||||
if (args.ne01 % mmq_y == 0) {
|
if (args.ne01 % mmq_y == 0) {
|
||||||
|
Loading…
Reference in New Issue
Block a user