metal : remove unused n_buffers and buffers (#5129)

This commit is contained in:
Paul Tsochantaris 2024-01-26 12:16:07 +00:00 committed by GitHub
parent 38b431de23
commit 6dd3c28c9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -26,15 +26,6 @@
#define GGML_METAL_MAX_KERNELS 256 #define GGML_METAL_MAX_KERNELS 256
struct ggml_metal_buffer {
const char * name;
void * data;
size_t size;
id<MTLBuffer> metal;
};
struct ggml_metal_kernel { struct ggml_metal_kernel {
id<MTLFunction> function; id<MTLFunction> function;
id<MTLComputePipelineState> pipeline; id<MTLComputePipelineState> pipeline;
@ -172,9 +163,6 @@ struct ggml_metal_context {
dispatch_queue_t d_queue; dispatch_queue_t d_queue;
int n_buffers;
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS]; struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
bool support_simdgroup_reduction; bool support_simdgroup_reduction;
@ -242,24 +230,20 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
// Show all the Metal device instances in the system // Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices(); NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) { for (id<MTLDevice> device in devices) {
NSString * s = [device name]; GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
} }
[devices release]; // since it was created by a *Copy* C method [devices release]; // since it was created by a *Copy* C method
#endif #endif
// Pick and show default Metal device // Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice(); id<MTLDevice> device = MTLCreateSystemDefaultDevice();
NSString * s = [device name]; GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
// Configure context // Configure context
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context)); struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
ctx->device = device; ctx->device = device;
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS); ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
ctx->queue = [ctx->device newCommandQueue]; ctx->queue = [ctx->device newCommandQueue];
ctx->n_buffers = 0;
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
// load library // load library
@ -534,10 +518,6 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
static void ggml_metal_free(struct ggml_metal_context * ctx) { static void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__); GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
for (int i = 0; i < ctx->n_buffers; ++i) {
[ctx->buffers[i].metal release];
}
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) { for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
if (ctx->kernels[i].pipeline) { if (ctx->kernels[i].pipeline) {
[ctx->kernels[i].pipeline release]; [ctx->kernels[i].pipeline release];
@ -580,15 +560,13 @@ struct ggml_backend_metal_buffer_context {
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
// Metal buffer based on the host memory pointer // Metal buffer based on the host memory pointer
// //
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) { static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach); //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
const int64_t tsize = ggml_nbytes(t); const int64_t tsize = ggml_nbytes(t);
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer; ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
// compatibility with ggml-backend
if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context; struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
// find the view that contains the tensor fully // find the view that contains the tensor fully
@ -607,25 +585,6 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name); GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
return nil;
}
// find the view that contains the tensor fully
for (int i = 0; i < ctx->n_buffers; ++i) {
const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
*offs = (size_t) ioffs;
//GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
return ctx->buffers[i].metal;
}
}
GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
return nil; return nil;
} }
@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT; const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil; id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil; id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil; id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op)); //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//if (src0) { //if (src0) {
@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)]; struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
size_t offs_src_cur = 0; size_t offs_src_cur = 0;
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j]; [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
} }
@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)]; struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
size_t offs_src_cur = 0; size_t offs_src_cur = 0;
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j]; [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
} }