From cb871fa022aa7a8b72c3a616f7ac7e8e9f1748d9 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 26 Jul 2023 18:48:52 +0300
Subject: [PATCH] gguf : do not support passing existing ggml_context to
 gguf_init

---
 ggml.c | 51 ++++++++++++++++-----------------------------------
 ggml.h |  5 ++---
 2 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/ggml.c b/ggml.c
index 030475062..1b14c3790 100644
--- a/ggml.c
+++ b/ggml.c
@@ -18388,6 +18388,8 @@ static bool gguf_fread_str(void * dst, FILE * file, size_t * offset) {
 }
 
 struct gguf_context * gguf_init(const char * fname, struct gguf_init_params params) {
+    GGML_ASSERT(!params.load || params.malloc || params.ctx != NULL);
+
     FILE * file = fopen(fname, "rb");
     if (!file) {
         return NULL;
@@ -18518,8 +18520,7 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
 
         const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
 
-        // TODO: pad size_cur to alignment
-        ctx->size_data += size_cur;
+        ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
     }
 
     // TODO: simplify
@@ -18528,28 +18529,18 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
             ctx->data = GGML_ALIGNED_MALLOC(ctx->size_data);
             fseek(file, ctx->offset, SEEK_SET);
             ok = ok && gguf_fread_el(ctx->data, ctx->size_data, file, &offset);
-        } else if (params.ctx != NULL) {
-            bool ctx_new = false;
-            bool ctx_no_alloc = false;
+        } else {
+            const size_t mem_size =
+                ctx->header.n_tensors*ggml_tensor_overhead() + 1 +
+                ctx->size_data;
 
-            if (*params.ctx == NULL) {
-                const size_t mem_size =
-                    ctx->header.n_tensors*ggml_tensor_overhead() + 1 +
-                    ctx->size_data;
+            struct ggml_init_params pdata = {
+                .mem_size   = mem_size,
+                .mem_buffer = NULL,
+                .no_alloc   = false,
+            };
 
-                struct ggml_init_params pdata = {
-                    .mem_size   = mem_size,
-                    .mem_buffer = NULL,
-                    .no_alloc   = false,
-                };
-
-                *params.ctx = ggml_init(pdata);
-
-                ctx_new = true;
-            } else {
-                ctx_no_alloc = ggml_get_no_alloc(*params.ctx);
-                ggml_set_no_alloc(*params.ctx, false);
-            }
+            *params.ctx = ggml_init(pdata);
 
             struct ggml_context * ctx_data = *params.ctx;
 
@@ -18561,11 +18552,7 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
             if (!ok) {
                 fprintf(stderr, "%s: failed to read tensor data\n", __func__);
                 fclose(file);
-                if (ctx_new) {
-                    ggml_free(ctx_data);
-                } else {
-                    ggml_set_no_alloc(ctx_data, ctx_no_alloc);
-                }
+                ggml_free(ctx_data);
                 gguf_free(ctx);
                 return NULL;
             }
@@ -18597,18 +18584,12 @@ struct gguf_context * gguf_init(const char * fname, struct gguf_init_params para
             if (!ok) {
                 fprintf(stderr, "%s: failed to create tensors\n", __func__);
                 fclose(file);
-                if (ctx_new) {
-                    ggml_free(ctx_data);
-                } else {
-                    ggml_set_no_alloc(ctx_data, ctx_no_alloc);
-                }
+                ggml_free(ctx_data);
                 gguf_free(ctx);
                 return NULL;
             }
 
-            ggml_set_no_alloc(ctx_data, ctx_no_alloc);
-        } else {
-            GGML_ASSERT("gguf: invalid params - load requires malloc or ctx");
+            ggml_set_no_alloc(ctx_data, false);
         }
     }
 
diff --git a/ggml.h b/ggml.h
index 1a748d8d8..7d5514ba3 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1636,9 +1636,8 @@ extern "C" {
 
     struct gguf_init_params {
         bool load;   // load the tensor data
-        bool malloc; // if false, use the provided ggml_context to allocate the tensor data
-                     //           it no ggml_context is provided, it will be created
-                     // if true,  use malloc to allocate the tensor data
+        bool malloc; // if false, create a ggml_context and allocate the tensor data in it
+                     // if  true, use malloc to allocate the tensor data instead
 
         struct ggml_context ** ctx;
     };