mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
gguf : add 64-bit support (GGUF v2) (#2821)
* gguf : bump version to 2 * gguf : add support for 64-bit (no backwards comp yet) * gguf : v1 backwards comp * gguf.py : bump GGUF version * gguf.py : uint64_t on all lengths, sizes and counts, enums still uint32_t * gguf.py : string lengths uint32_t * gguf : update all counts to 64-bit * gguf.py : string len uint64_t and n_dims uint32_t * gguf : fix typo * llama.cpp : print gguf version --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
parent
edd4c14817
commit
d0cee0d36d
@ -30,6 +30,9 @@ bool gguf_ex_write(const std::string & fname) {
|
|||||||
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
|
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
|
||||||
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
|
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
|
||||||
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
|
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
|
||||||
|
gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
|
||||||
|
gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
|
||||||
|
gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
|
||||||
gguf_set_val_bool(ctx, "some.parameter.bool", true);
|
gguf_set_val_bool(ctx, "some.parameter.bool", true);
|
||||||
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
|
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
|
||||||
|
|
||||||
|
137
ggml.c
137
ggml.c
@ -19394,7 +19394,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
struct gguf_str {
|
struct gguf_str {
|
||||||
uint32_t n;
|
uint64_t n; // GGUFv2
|
||||||
char * data;
|
char * data;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -19408,9 +19408,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
|||||||
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
||||||
[GGUF_TYPE_BOOL] = sizeof(bool),
|
[GGUF_TYPE_BOOL] = sizeof(bool),
|
||||||
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
||||||
|
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
|
||||||
|
[GGUF_TYPE_INT64] = sizeof(int64_t),
|
||||||
|
[GGUF_TYPE_FLOAT64] = sizeof(double),
|
||||||
[GGUF_TYPE_ARRAY] = 0, // undefined
|
[GGUF_TYPE_ARRAY] = 0, // undefined
|
||||||
};
|
};
|
||||||
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
||||||
|
|
||||||
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
||||||
[GGUF_TYPE_UINT8] = "u8",
|
[GGUF_TYPE_UINT8] = "u8",
|
||||||
@ -19423,8 +19426,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
|||||||
[GGUF_TYPE_BOOL] = "bool",
|
[GGUF_TYPE_BOOL] = "bool",
|
||||||
[GGUF_TYPE_STRING] = "str",
|
[GGUF_TYPE_STRING] = "str",
|
||||||
[GGUF_TYPE_ARRAY] = "arr",
|
[GGUF_TYPE_ARRAY] = "arr",
|
||||||
|
[GGUF_TYPE_UINT64] = "u64",
|
||||||
|
[GGUF_TYPE_INT64] = "i64",
|
||||||
|
[GGUF_TYPE_FLOAT64] = "f64",
|
||||||
};
|
};
|
||||||
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
||||||
|
|
||||||
union gguf_value {
|
union gguf_value {
|
||||||
uint8_t uint8;
|
uint8_t uint8;
|
||||||
@ -19434,6 +19440,9 @@ union gguf_value {
|
|||||||
uint32_t uint32;
|
uint32_t uint32;
|
||||||
int32_t int32;
|
int32_t int32;
|
||||||
float float32;
|
float float32;
|
||||||
|
uint64_t uint64;
|
||||||
|
int64_t int64;
|
||||||
|
double float64;
|
||||||
bool bool_;
|
bool bool_;
|
||||||
|
|
||||||
struct gguf_str str;
|
struct gguf_str str;
|
||||||
@ -19441,7 +19450,7 @@ union gguf_value {
|
|||||||
struct {
|
struct {
|
||||||
enum gguf_type type;
|
enum gguf_type type;
|
||||||
|
|
||||||
uint32_t n;
|
uint64_t n; // GGUFv2
|
||||||
void * data;
|
void * data;
|
||||||
} arr;
|
} arr;
|
||||||
};
|
};
|
||||||
@ -19449,8 +19458,6 @@ union gguf_value {
|
|||||||
struct gguf_kv {
|
struct gguf_kv {
|
||||||
struct gguf_str key;
|
struct gguf_str key;
|
||||||
|
|
||||||
uint32_t n_bytes; // TODO: is this actually needed?
|
|
||||||
|
|
||||||
enum gguf_type type;
|
enum gguf_type type;
|
||||||
union gguf_value value;
|
union gguf_value value;
|
||||||
};
|
};
|
||||||
@ -19458,15 +19465,15 @@ struct gguf_kv {
|
|||||||
struct gguf_header {
|
struct gguf_header {
|
||||||
uint32_t magic;
|
uint32_t magic;
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
uint32_t n_tensors;
|
uint64_t n_tensors; // GGUFv2
|
||||||
uint32_t n_kv;
|
uint64_t n_kv; // GGUFv2
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gguf_tensor_info {
|
struct gguf_tensor_info {
|
||||||
struct gguf_str name;
|
struct gguf_str name;
|
||||||
|
|
||||||
uint32_t n_dims;
|
uint32_t n_dims;
|
||||||
uint32_t ne[GGML_MAX_DIMS];
|
uint64_t ne[GGML_MAX_DIMS];
|
||||||
|
|
||||||
enum ggml_type type;
|
enum ggml_type type;
|
||||||
|
|
||||||
@ -19497,19 +19504,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
|
|||||||
return n == size;
|
return n == size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
||||||
|
static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
|
||||||
p->n = 0;
|
p->n = 0;
|
||||||
p->data = NULL;
|
p->data = NULL;
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
// TODO: how to avoid mallocs for strings?
|
|
||||||
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
|
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
|
||||||
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
|
||||||
|
p->n = 0;
|
||||||
|
p->data = NULL;
|
||||||
|
|
||||||
|
bool ok = true;
|
||||||
|
|
||||||
|
uint32_t n = 0;
|
||||||
|
ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
|
||||||
|
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
||||||
|
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
struct gguf_context * gguf_init_empty(void) {
|
struct gguf_context * gguf_init_empty(void) {
|
||||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||||
|
|
||||||
@ -19565,8 +19585,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
ctx->data = NULL;
|
ctx->data = NULL;
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
if (ctx->header.version == 1) {
|
||||||
|
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
||||||
|
uint32_t n_tensors = 0;
|
||||||
|
uint32_t n_kv = 0;
|
||||||
|
|
||||||
|
ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
|
||||||
|
ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
|
||||||
|
|
||||||
|
ctx->header.n_tensors = n_tensors;
|
||||||
|
ctx->header.n_kv = n_kv;
|
||||||
|
} else {
|
||||||
|
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
||||||
|
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
||||||
|
}
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read header\n", __func__);
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
||||||
@ -19576,6 +19609,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
||||||
|
bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
|
||||||
|
if (ctx->header.version == 1) {
|
||||||
|
gguf_fread_str = gguf_fread_str_v1;
|
||||||
|
}
|
||||||
|
|
||||||
// read the kv pairs
|
// read the kv pairs
|
||||||
{
|
{
|
||||||
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
||||||
@ -19585,9 +19624,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
|
|
||||||
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
||||||
|
|
||||||
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
||||||
//ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
|
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
||||||
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
|
||||||
|
|
||||||
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
||||||
|
|
||||||
@ -19599,12 +19637,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
||||||
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
||||||
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
||||||
|
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
||||||
|
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
||||||
|
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
||||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
||||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
{
|
{
|
||||||
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
||||||
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
|
||||||
|
if (ctx->header.version == 1) {
|
||||||
|
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
||||||
|
uint32_t n = 0;
|
||||||
|
ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
|
||||||
|
kv->value.arr.n = n;
|
||||||
|
} else {
|
||||||
|
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||||
|
}
|
||||||
|
|
||||||
switch (kv->value.arr.type) {
|
switch (kv->value.arr.type) {
|
||||||
case GGUF_TYPE_UINT8:
|
case GGUF_TYPE_UINT8:
|
||||||
@ -19614,6 +19663,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
case GGUF_TYPE_UINT32:
|
case GGUF_TYPE_UINT32:
|
||||||
case GGUF_TYPE_INT32:
|
case GGUF_TYPE_INT32:
|
||||||
case GGUF_TYPE_FLOAT32:
|
case GGUF_TYPE_FLOAT32:
|
||||||
|
case GGUF_TYPE_UINT64:
|
||||||
|
case GGUF_TYPE_INT64:
|
||||||
|
case GGUF_TYPE_FLOAT64:
|
||||||
case GGUF_TYPE_BOOL:
|
case GGUF_TYPE_BOOL:
|
||||||
{
|
{
|
||||||
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||||
@ -19660,7 +19712,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||||||
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
||||||
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
||||||
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
||||||
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
if (ctx->header.version == 1) {
|
||||||
|
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
||||||
|
uint32_t t = 0;
|
||||||
|
ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
|
||||||
|
info->ne[j] = t;
|
||||||
|
} else {
|
||||||
|
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
||||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
||||||
@ -19954,6 +20013,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
|
|||||||
return ctx->kv[i].value.float32;
|
return ctx->kv[i].value.float32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
|
||||||
|
return ctx->kv[i].value.uint64;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
|
||||||
|
return ctx->kv[i].value.int64;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
|
||||||
|
return ctx->kv[i].value.float64;
|
||||||
|
}
|
||||||
|
|
||||||
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
|
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
|
||||||
return ctx->kv[i].value.bool_;
|
return ctx->kv[i].value.bool_;
|
||||||
}
|
}
|
||||||
@ -20056,6 +20127,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
|
|||||||
ctx->kv[idx].value.float32 = val;
|
ctx->kv[idx].value.float32 = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
|
||||||
|
const int idx = gguf_get_or_add_key(ctx, key);
|
||||||
|
|
||||||
|
ctx->kv[idx].type = GGUF_TYPE_UINT64;
|
||||||
|
ctx->kv[idx].value.uint64 = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
|
||||||
|
const int idx = gguf_get_or_add_key(ctx, key);
|
||||||
|
|
||||||
|
ctx->kv[idx].type = GGUF_TYPE_INT64;
|
||||||
|
ctx->kv[idx].value.int64 = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
|
||||||
|
const int idx = gguf_get_or_add_key(ctx, key);
|
||||||
|
|
||||||
|
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
|
||||||
|
ctx->kv[idx].value.float64 = val;
|
||||||
|
}
|
||||||
|
|
||||||
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
|
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
|
||||||
const int idx = gguf_get_or_add_key(ctx, key);
|
const int idx = gguf_get_or_add_key(ctx, key);
|
||||||
|
|
||||||
@ -20106,6 +20198,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|||||||
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
|
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
|
||||||
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
|
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
|
||||||
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
|
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
|
||||||
|
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
|
||||||
|
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
|
||||||
|
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
|
||||||
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
|
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
|
||||||
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
|
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
@ -20267,6 +20362,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|||||||
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
||||||
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
||||||
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
||||||
|
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
|
||||||
|
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
|
||||||
|
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
|
||||||
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
||||||
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
@ -20282,6 +20380,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|||||||
case GGUF_TYPE_UINT32:
|
case GGUF_TYPE_UINT32:
|
||||||
case GGUF_TYPE_INT32:
|
case GGUF_TYPE_INT32:
|
||||||
case GGUF_TYPE_FLOAT32:
|
case GGUF_TYPE_FLOAT32:
|
||||||
|
case GGUF_TYPE_UINT64:
|
||||||
|
case GGUF_TYPE_INT64:
|
||||||
|
case GGUF_TYPE_FLOAT64:
|
||||||
case GGUF_TYPE_BOOL:
|
case GGUF_TYPE_BOOL:
|
||||||
{
|
{
|
||||||
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||||
|
11
ggml.h
11
ggml.h
@ -216,7 +216,7 @@
|
|||||||
#define GGML_EXIT_ABORTED 1
|
#define GGML_EXIT_ABORTED 1
|
||||||
|
|
||||||
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
||||||
#define GGUF_VERSION 1
|
#define GGUF_VERSION 2
|
||||||
|
|
||||||
#define GGUF_DEFAULT_ALIGNMENT 32
|
#define GGUF_DEFAULT_ALIGNMENT 32
|
||||||
|
|
||||||
@ -1827,6 +1827,9 @@ extern "C" {
|
|||||||
GGUF_TYPE_BOOL = 7,
|
GGUF_TYPE_BOOL = 7,
|
||||||
GGUF_TYPE_STRING = 8,
|
GGUF_TYPE_STRING = 8,
|
||||||
GGUF_TYPE_ARRAY = 9,
|
GGUF_TYPE_ARRAY = 9,
|
||||||
|
GGUF_TYPE_UINT64 = 10,
|
||||||
|
GGUF_TYPE_INT64 = 11,
|
||||||
|
GGUF_TYPE_FLOAT64 = 12,
|
||||||
GGUF_TYPE_COUNT, // marks the end of the enum
|
GGUF_TYPE_COUNT, // marks the end of the enum
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1867,6 +1870,9 @@ extern "C" {
|
|||||||
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
||||||
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
||||||
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
||||||
|
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
||||||
|
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
||||||
|
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
||||||
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
||||||
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
||||||
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
||||||
@ -1886,6 +1892,9 @@ extern "C" {
|
|||||||
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
||||||
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
||||||
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
||||||
|
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
||||||
|
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
||||||
|
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
||||||
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
||||||
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
||||||
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
||||||
|
@ -13,7 +13,7 @@ from typing import Any, IO, List, Optional
|
|||||||
#
|
#
|
||||||
|
|
||||||
GGUF_MAGIC = 0x46554747
|
GGUF_MAGIC = 0x46554747
|
||||||
GGUF_VERSION = 1
|
GGUF_VERSION = 2
|
||||||
GGUF_DEFAULT_ALIGNMENT = 32
|
GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
|
|
||||||
# general
|
# general
|
||||||
@ -365,6 +365,9 @@ class GGUFValueType(IntEnum):
|
|||||||
BOOL = 7
|
BOOL = 7
|
||||||
STRING = 8
|
STRING = 8
|
||||||
ARRAY = 9
|
ARRAY = 9
|
||||||
|
UINT64 = 10
|
||||||
|
INT64 = 11
|
||||||
|
FLOAT64 = 12
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_type(val):
|
def get_type(val):
|
||||||
@ -378,6 +381,7 @@ class GGUFValueType(IntEnum):
|
|||||||
return GGUFValueType.BOOL
|
return GGUFValueType.BOOL
|
||||||
elif isinstance(val, int):
|
elif isinstance(val, int):
|
||||||
return GGUFValueType.INT32
|
return GGUFValueType.INT32
|
||||||
|
# TODO: need help with 64-bit types in Python
|
||||||
else:
|
else:
|
||||||
print("Unknown type: "+str(type(val)))
|
print("Unknown type: "+str(type(val)))
|
||||||
sys.exit()
|
sys.exit()
|
||||||
@ -400,8 +404,8 @@ class GGUFWriter:
|
|||||||
def write_header_to_file(self):
|
def write_header_to_file(self):
|
||||||
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
||||||
self.fout.write(struct.pack("<I", GGUF_VERSION))
|
self.fout.write(struct.pack("<I", GGUF_VERSION))
|
||||||
self.fout.write(struct.pack("<I", self.ti_data_count))
|
self.fout.write(struct.pack("<Q", self.ti_data_count))
|
||||||
self.fout.write(struct.pack("<I", self.kv_data_count))
|
self.fout.write(struct.pack("<Q", self.kv_data_count))
|
||||||
self.flush()
|
self.flush()
|
||||||
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
||||||
|
|
||||||
@ -444,6 +448,18 @@ class GGUFWriter:
|
|||||||
self.add_key(key)
|
self.add_key(key)
|
||||||
self.add_val(val, GGUFValueType.FLOAT32)
|
self.add_val(val, GGUFValueType.FLOAT32)
|
||||||
|
|
||||||
|
def add_uint64(self, key: str, val: int):
|
||||||
|
self.add_key(key)
|
||||||
|
self.add_val(val, GGUFValueType.UINT64)
|
||||||
|
|
||||||
|
def add_int64(self, key: str, val: int):
|
||||||
|
self.add_key(key)
|
||||||
|
self.add_val(val, GGUFValueType.INT64)
|
||||||
|
|
||||||
|
def add_float64(self, key: str, val: float):
|
||||||
|
self.add_key(key)
|
||||||
|
self.add_val(val, GGUFValueType.FLOAT64)
|
||||||
|
|
||||||
def add_bool(self, key: str, val: bool):
|
def add_bool(self, key: str, val: bool):
|
||||||
self.add_key(key)
|
self.add_key(key)
|
||||||
self.add_val(val, GGUFValueType.BOOL)
|
self.add_val(val, GGUFValueType.BOOL)
|
||||||
@ -483,17 +499,23 @@ class GGUFWriter:
|
|||||||
self.kv_data += struct.pack("<i", val)
|
self.kv_data += struct.pack("<i", val)
|
||||||
elif vtype == GGUFValueType.FLOAT32:
|
elif vtype == GGUFValueType.FLOAT32:
|
||||||
self.kv_data += struct.pack("<f", val)
|
self.kv_data += struct.pack("<f", val)
|
||||||
|
elif vtype == GGUFValueType.UINT64:
|
||||||
|
self.kv_data += struct.pack("<Q", val)
|
||||||
|
elif vtype == GGUFValueType.INT64:
|
||||||
|
self.kv_data += struct.pack("<q", val)
|
||||||
|
elif vtype == GGUFValueType.FLOAT64:
|
||||||
|
self.kv_data += struct.pack("<d", val)
|
||||||
elif vtype == GGUFValueType.BOOL:
|
elif vtype == GGUFValueType.BOOL:
|
||||||
self.kv_data += struct.pack("?", val)
|
self.kv_data += struct.pack("?", val)
|
||||||
elif vtype == GGUFValueType.STRING:
|
elif vtype == GGUFValueType.STRING:
|
||||||
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
||||||
self.kv_data += struct.pack("<I", len(encoded_val))
|
self.kv_data += struct.pack("<Q", len(encoded_val))
|
||||||
self.kv_data += encoded_val
|
self.kv_data += encoded_val
|
||||||
elif vtype == GGUFValueType.ARRAY:
|
elif vtype == GGUFValueType.ARRAY:
|
||||||
ltype = set([GGUFValueType.get_type(item) for item in val])
|
ltype = set([GGUFValueType.get_type(item) for item in val])
|
||||||
assert len(ltype) == 1, "All items in a GGUF array should be of the same type"
|
assert len(ltype) == 1, "All items in a GGUF array should be of the same type"
|
||||||
self.kv_data += struct.pack("<I", list(ltype)[0])
|
self.kv_data += struct.pack("<I", list(ltype)[0])
|
||||||
self.kv_data += struct.pack("<I", len(val))
|
self.kv_data += struct.pack("<Q", len(val))
|
||||||
for item in val:
|
for item in val:
|
||||||
self.add_val(item, add_vtype=False)
|
self.add_val(item, add_vtype=False)
|
||||||
else:
|
else:
|
||||||
@ -507,12 +529,12 @@ class GGUFWriter:
|
|||||||
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
||||||
|
|
||||||
encoded_name = name.encode("utf8")
|
encoded_name = name.encode("utf8")
|
||||||
self.ti_data += struct.pack("<I", len(encoded_name))
|
self.ti_data += struct.pack("<Q", len(encoded_name))
|
||||||
self.ti_data += encoded_name
|
self.ti_data += encoded_name
|
||||||
n_dims = len(tensor_shape)
|
n_dims = len(tensor_shape)
|
||||||
self.ti_data += struct.pack("<I", n_dims)
|
self.ti_data += struct.pack("<I", n_dims)
|
||||||
for i in range(n_dims):
|
for i in range(n_dims):
|
||||||
self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
|
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
|
||||||
if raw_dtype is None:
|
if raw_dtype is None:
|
||||||
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
||||||
else:
|
else:
|
||||||
|
@ -1144,11 +1144,13 @@ static bool llama_kv_cache_init(
|
|||||||
|
|
||||||
enum llama_fver {
|
enum llama_fver {
|
||||||
GGUF_FILE_VERSION_V1 = 1,
|
GGUF_FILE_VERSION_V1 = 1,
|
||||||
|
GGUF_FILE_VERSION_V2 = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * llama_file_version_name(llama_fver version) {
|
static const char * llama_file_version_name(llama_fver version) {
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
|
case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)";
|
||||||
|
case GGUF_FILE_VERSION_V2: return "GGUF V2 (latest)";
|
||||||
}
|
}
|
||||||
|
|
||||||
return "unknown";
|
return "unknown";
|
||||||
|
Loading…
Reference in New Issue
Block a user