ggml : update softmax n_task calculation (#5126)

updated the n_task calculation to use max number of
threads possible. This has improved the prompt eval
performance by around 5% for DOT kernels and by
around 10% for MMLA kernels on AWS Graviton3.
This commit is contained in:
snadampal 2024-01-26 11:17:59 -06:00 committed by GitHub
parent 5f1925a8ce
commit 7032f4f634
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

2
ggml.c
View File

@ -16597,7 +16597,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
} break;
case GGML_OP_SOFT_MAX:
{
n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
} break;
case GGML_OP_CONV_TRANSPOSE_1D:
{