diff --git a/ggml.c b/ggml.c index 793b67f4c..90af4342d 100644 --- a/ggml.c +++ b/ggml.c @@ -20550,6 +20550,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) { return ok; } +static void gguf_free_kv(struct gguf_kv * kv) { + if (kv->key.data) { + GGML_FREE(kv->key.data); + } + + if (kv->type == GGUF_TYPE_STRING) { + if (kv->value.str.data) { + GGML_FREE(kv->value.str.data); + } + } + + if (kv->type == GGUF_TYPE_ARRAY) { + if (kv->value.arr.data) { + if (kv->value.arr.type == GGUF_TYPE_STRING) { + for (uint64_t j = 0; j < kv->value.arr.n; ++j) { + struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; + if (str->data) { + GGML_FREE(str->data); + } + } + } + GGML_FREE(kv->value.arr.data); + } + } +} + struct gguf_context * gguf_init_empty(void) { struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); @@ -20899,31 +20925,7 @@ void gguf_free(struct gguf_context * ctx) { if (ctx->kv) { // free string memory - not great.. for (uint64_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; - - if (kv->key.data) { - GGML_FREE(kv->key.data); - } - - if (kv->type == GGUF_TYPE_STRING) { - if (kv->value.str.data) { - GGML_FREE(kv->value.str.data); - } - } - - if (kv->type == GGUF_TYPE_ARRAY) { - if (kv->value.arr.data) { - if (kv->value.arr.type == GGUF_TYPE_STRING) { - for (uint64_t j = 0; j < kv->value.arr.n; ++j) { - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; - if (str->data) { - GGML_FREE(str->data); - } - } - } - GGML_FREE(kv->value.arr.data); - } - } + gguf_free_kv(&ctx->kv[i]); } GGML_FREE(ctx->kv); @@ -21148,6 +21150,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { return n_kv; } +void gguf_remove_key(struct gguf_context * ctx, const char * key) { + const int idx = gguf_find_key(ctx, key); + if (idx >= 0) { + const int n_kv = gguf_get_n_kv(ctx); + gguf_free_kv(&ctx->kv[idx]); + for (int i = idx; i < n_kv-1; ++i) { + ctx->kv[i] = ctx->kv[i+1]; + } + ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv)); + ctx->header.n_kv--; + } +} + void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { const int idx = gguf_get_or_add_key(ctx, key); diff --git a/ggml.h b/ggml.h index abe3767f2..e9ed8eeee 100644 --- a/ggml.h +++ b/ggml.h @@ -2289,6 +2289,9 @@ extern "C" { GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i); GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i); + // removes key if it exists + GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key); + // overrides existing values or adds a new one GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); diff --git a/llama.cpp b/llama.cpp index dad2c4fbf..83dd55efa 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s gguf_set_kv (ctx_out, ml.meta); gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); gguf_set_val_u32(ctx_out, "general.file_type", ftype); + // Remove split metadata + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str()); if (params->kv_overrides) { const std::vector & overrides = *(const std::vector *)params->kv_overrides;