CUDA: fix MMQ stream-k for --split-mode row (#8167)

This commit is contained in:
Johannes Gäßler 2024-06-27 16:26:05 +02:00 committed by GitHub
parent f675b20a3b
commit 85a267daaa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
const dim3 block_nums_mmq(nsm, 1, 1); const dim3 block_nums_mmq(nsm, 1, 1);
ggml_cuda_pool & pool = ctx.pool(); ggml_cuda_pool & pool = ctx.pool(id);
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y); ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
if (args.ne01 % mmq_y == 0) { if (args.ne01 % mmq_y == 0) {