mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 20:14:29 +00:00
metal : try to utilize more of the shared memory using smaller views
This commit is contained in:
parent
c824d2e368
commit
5cc672a9a5
@ -23,7 +23,7 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
// max memory buffers that can be mapped to the device
|
// max memory buffers that can be mapped to the device
|
||||||
#define GGML_METAL_MAX_BUFFERS 16
|
#define GGML_METAL_MAX_BUFFERS 256
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
struct ggml_cgraph;
|
struct ggml_cgraph;
|
||||||
|
@ -262,8 +262,10 @@ bool ggml_metal_add_buffer(
|
|||||||
size_aligned += (size_page - (size_aligned % size_page));
|
size_aligned += (size_page - (size_aligned % size_page));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const size_t max_buffer_length = ctx->device.maxBufferLength/4;
|
||||||
|
|
||||||
// the buffer fits into the max buffer size allowed by the device
|
// the buffer fits into the max buffer size allowed by the device
|
||||||
if (size_aligned <= ctx->device.maxBufferLength) {
|
if (size_aligned <= max_buffer_length) {
|
||||||
ctx->buffers[ctx->n_buffers].name = name;
|
ctx->buffers[ctx->n_buffers].name = name;
|
||||||
ctx->buffers[ctx->n_buffers].data = data;
|
ctx->buffers[ctx->n_buffers].data = data;
|
||||||
ctx->buffers[ctx->n_buffers].size = size;
|
ctx->buffers[ctx->n_buffers].size = size;
|
||||||
@ -282,8 +284,8 @@ bool ggml_metal_add_buffer(
|
|||||||
// this overlap between the views will guarantee that the tensor with the maximum size will fully fit into
|
// this overlap between the views will guarantee that the tensor with the maximum size will fully fit into
|
||||||
// one of the views
|
// one of the views
|
||||||
const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case
|
const size_t size_ovlp = ((max_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case
|
||||||
const size_t size_step = ctx->device.maxBufferLength - size_ovlp;
|
const size_t size_step = max_buffer_length - size_ovlp;
|
||||||
const size_t size_view = ctx->device.maxBufferLength;
|
const size_t size_view = max_buffer_length;
|
||||||
|
|
||||||
for (size_t i = 0; i < size; i += size_step) {
|
for (size_t i = 0; i < size; i += size_step) {
|
||||||
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
|
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
|
||||||
|
Loading…
Reference in New Issue
Block a user