metal : utilize max shared memory for mul_mat_id (#7935)

This commit is contained in:
Georgi Gerganov 2024-06-14 17:14:09 +03:00 committed by GitHub
parent e65bbf606c
commit 66ef1ceedf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute(
// ne21 = n_rows // ne21 = n_rows
const int dst_rows = ne20*ne21; const int dst_rows = ne20*ne21;
const int dst_rows_min = n_as; const int dst_rows_min = n_as;
const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4;
// max size of the rowids array in the kernel shared buffer // max size of the rowids array in the kernel shared buffer
GGML_ASSERT(dst_rows <= 2048); GGML_ASSERT(dst_rows <= dst_rows_max);
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel