From 5cc672a9a537ab194ccc4874b20bdc8267bf6eb0 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 26 Jun 2023 22:23:04 +0300 Subject: [PATCH] metal : try to utilize more of the shared memory using smaller views --- ggml-metal.h | 2 +- ggml-metal.m | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ggml-metal.h b/ggml-metal.h index b9e50ac74..eb070ec84 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -23,7 +23,7 @@ #include // max memory buffers that can be mapped to the device -#define GGML_METAL_MAX_BUFFERS 16 +#define GGML_METAL_MAX_BUFFERS 256 struct ggml_tensor; struct ggml_cgraph; diff --git a/ggml-metal.m b/ggml-metal.m index 7551231b9..ef3206fc1 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -262,8 +262,10 @@ bool ggml_metal_add_buffer( size_aligned += (size_page - (size_aligned % size_page)); } + const size_t max_buffer_length = ctx->device.maxBufferLength/4; + // the buffer fits into the max buffer size allowed by the device - if (size_aligned <= ctx->device.maxBufferLength) { + if (size_aligned <= max_buffer_length) { ctx->buffers[ctx->n_buffers].name = name; ctx->buffers[ctx->n_buffers].data = data; ctx->buffers[ctx->n_buffers].size = size; @@ -282,8 +284,8 @@ bool ggml_metal_add_buffer( // this overlap between the views will guarantee that the tensor with the maximum size will fully fit into // one of the views const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case - const size_t size_step = ctx->device.maxBufferLength - size_ovlp; - const size_t size_view = ctx->device.maxBufferLength; + const size_t size_step = max_buffer_length - size_ovlp; + const size_t size_view = max_buffer_length; for (size_t i = 0; i < size; i += size_step) { const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);