metal : remove unused n_buffers and buffers (#5129)

2024-09-22 21:16:20 +00:00 · 2024-01-26 12:16:07 +00:00 · 2024-01-26 12:16:07 +00:00 · 6dd3c28c9c
commit 6dd3c28c9c
parent 38b431de23
1 changed files with 16 additions and 57 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -26,15 +26,6 @@
 #define GGML_METAL_MAX_KERNELS 256
 struct ggml_metal_buffer {
    const char * name;
    void   * data;
    size_t   size;
    id<MTLBuffer> metal;
 };
 struct ggml_metal_kernel {
    id<MTLFunction>             function;
    id<MTLComputePipelineState> pipeline;
@ -172,9 +163,6 @@ struct ggml_metal_context {
    dispatch_queue_t d_queue;
    int n_buffers;
    struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
    struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
    bool support_simdgroup_reduction;
@ -242,24 +230,20 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
    // Show all the Metal device instances in the system
    NSArray * devices = MTLCopyAllDevices();
    for (id<MTLDevice> device in devices) {
-        NSString * s = [device name];
+        GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
        GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
    }
    [devices release]; // since it was created by a *Copy* C method
 #endif
    // Pick and show default Metal device
    id<MTLDevice> device = MTLCreateSystemDefaultDevice();
-    NSString * s = [device name];
+    GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
    GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
    // Configure context
    struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
    ctx->device = device;
    ctx->n_cb   = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
    ctx->queue  = [ctx->device newCommandQueue];
    ctx->n_buffers = 0;
    ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
    // load library
@ -534,10 +518,6 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
 static void ggml_metal_free(struct ggml_metal_context * ctx) {
    GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
    for (int i = 0; i < ctx->n_buffers; ++i) {
        [ctx->buffers[i].metal release];
    }
    for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
        if (ctx->kernels[i].pipeline) {
            [ctx->kernels[i].pipeline release];
@ -580,51 +560,30 @@ struct ggml_backend_metal_buffer_context {
 // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
 // Metal buffer based on the host memory pointer
 //
-static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
+static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
    //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
    const int64_t tsize = ggml_nbytes(t);
    ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
-    // compatibility with ggml-backend
+    struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
    if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
        struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
        // find the view that contains the tensor fully
        for (int i = 0; i < buf_ctx->n_buffers; ++i) {
            const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
            //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
            if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
                *offs = (size_t) ioffs;
                //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
                return buf_ctx->buffers[i].metal;
            }
        }
        GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
        return nil;
    }
    // find the view that contains the tensor fully
-    for (int i = 0; i < ctx->n_buffers; ++i) {
+    for (int i = 0; i < buf_ctx->n_buffers; ++i) {
-        const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
+        const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
-        //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
+        //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
-        if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
+        if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
            *offs = (size_t) ioffs;
-            //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
+            //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
-            return ctx->buffers[i].metal;
+            return buf_ctx->buffers[i].metal;
        }
    }
-    GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
+    GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
    return nil;
 }
@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
            const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
            const enum ggml_type dstt  = dst  ? dst->type  : GGML_TYPE_COUNT;
-            id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
+            id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
-            id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
+            id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
-            id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(ctx, dst,  &offs_dst)  : nil;
+            id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(dst,  &offs_dst)  : nil;
            //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
            //if (src0) {
@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
                                struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
                                size_t offs_src_cur = 0;
-                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
                                [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
                            }
@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
                                struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
                                size_t offs_src_cur = 0;
-                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
                                [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
                            }