mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
Expose type name from ggml (#970)
Avoid duplication of type names in utils Co-authored-by: Håkon H. Hitland <haakon@likedan.net>
This commit is contained in:
parent
f4d277ae17
commit
c56b715269
@ -16,9 +16,6 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
|
|
||||||
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
|
|
||||||
|
|
||||||
struct quantize_stats_params {
|
struct quantize_stats_params {
|
||||||
std::string model = "models/7B/ggml-model-f16.bin";
|
std::string model = "models/7B/ggml-model-f16.bin";
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
@ -224,7 +221,7 @@ int main(int argc, char ** argv) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
int j;
|
int j;
|
||||||
for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], type_strs[j]) != 0; j++) {
|
for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], ggml_type_name((ggml_type) i)) != 0; j++) {
|
||||||
// find match
|
// find match
|
||||||
}
|
}
|
||||||
if (j < GGML_TYPE_COUNT) {
|
if (j < GGML_TYPE_COUNT) {
|
||||||
@ -279,7 +276,7 @@ int main(int argc, char ** argv) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), type_strs[kv_tensor.second->type], ggml_nelements(kv_tensor.second));
|
printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second));
|
||||||
}
|
}
|
||||||
if (kv_tensor.second->type == GGML_TYPE_F16) {
|
if (kv_tensor.second->type == GGML_TYPE_F16) {
|
||||||
is_f16 = true;
|
is_f16 = true;
|
||||||
@ -304,13 +301,14 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// loop throught quantization types
|
// loop throught quantization types
|
||||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
|
const ggml_type type = (ggml_type) i;
|
||||||
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
|
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
||||||
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf("testing %s ...\n", type_strs[i]);
|
printf("testing %s ...\n", ggml_type_name(type));
|
||||||
}
|
}
|
||||||
|
|
||||||
error_stats global_stats {};
|
error_stats global_stats {};
|
||||||
@ -322,7 +320,7 @@ int main(int argc, char ** argv) {
|
|||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf(" %s ...\n", kv_tensor.first.c_str());
|
printf(" %s ...\n", kv_tensor.first.c_str());
|
||||||
}
|
}
|
||||||
std::string layer_name { type_strs[i] };
|
std::string layer_name { ggml_type_name(type) };
|
||||||
layer_name += "::" + kv_tensor.first;
|
layer_name += "::" + kv_tensor.first;
|
||||||
test_roundtrip_on_layer(
|
test_roundtrip_on_layer(
|
||||||
layer_name,
|
layer_name,
|
||||||
@ -337,7 +335,7 @@ int main(int argc, char ** argv) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
print_error_stats(type_strs[i], global_stats, params.print_histogram);
|
print_error_stats(ggml_type_name(type), global_stats, params.print_histogram);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
17
ggml.c
17
ggml.c
@ -2671,6 +2671,18 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
|
|||||||
};
|
};
|
||||||
static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_SIZE is outdated");
|
static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_SIZE is outdated");
|
||||||
|
|
||||||
|
|
||||||
|
static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
|
||||||
|
[GGML_TYPE_F32] = "f32",
|
||||||
|
[GGML_TYPE_F16] = "f16",
|
||||||
|
[GGML_TYPE_Q4_0] = "q4_0",
|
||||||
|
[GGML_TYPE_Q4_1] = "q4_1",
|
||||||
|
[GGML_TYPE_I8] = "i8",
|
||||||
|
[GGML_TYPE_I16] = "i16",
|
||||||
|
[GGML_TYPE_I32] = "i32",
|
||||||
|
};
|
||||||
|
static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_NAME is outdated");
|
||||||
|
|
||||||
static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
|
||||||
"NONE",
|
"NONE",
|
||||||
|
|
||||||
@ -2895,6 +2907,11 @@ float ggml_type_sizef(enum ggml_type type) {
|
|||||||
return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type];
|
return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * ggml_type_name(enum ggml_type type) {
|
||||||
|
return GGML_TYPE_NAME[type];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||||
return GGML_TYPE_SIZE[tensor->type];
|
return GGML_TYPE_SIZE[tensor->type];
|
||||||
}
|
}
|
||||||
|
2
ggml.h
2
ggml.h
@ -354,6 +354,8 @@ int ggml_blck_size (enum ggml_type type);
|
|||||||
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
||||||
float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
||||||
|
|
||||||
|
const char * ggml_type_name(enum ggml_type type);
|
||||||
|
|
||||||
size_t ggml_element_size(const struct ggml_tensor * tensor);
|
size_t ggml_element_size(const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
struct ggml_context * ggml_init(struct ggml_init_params params);
|
struct ggml_context * ggml_init(struct ggml_init_params params);
|
||||||
|
14
llama.cpp
14
llama.cpp
@ -269,16 +269,6 @@ static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char * llama_format_type(enum ggml_type type) {
|
|
||||||
switch (type) {
|
|
||||||
case GGML_TYPE_F32: return "f32";
|
|
||||||
case GGML_TYPE_F16: return "f16";
|
|
||||||
case GGML_TYPE_Q4_0: return "q4_0";
|
|
||||||
case GGML_TYPE_Q4_1: return "q4_1";
|
|
||||||
default: LLAMA_ASSERT(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
|
static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
|
||||||
size_t size = ggml_type_size(type);
|
size_t size = ggml_type_size(type);
|
||||||
for (uint32_t dim : ne) {
|
for (uint32_t dim : ne) {
|
||||||
@ -1582,7 +1572,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
printf("[%zu/%zu] %36s - %s, type = %6s, ",
|
printf("[%zu/%zu] %36s - %s, type = %6s, ",
|
||||||
++idx, model_loader->tensors_map.tensors.size(),
|
++idx, model_loader->tensors_map.tensors.size(),
|
||||||
tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
|
tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
|
||||||
llama_format_type(tensor.type));
|
ggml_type_name(tensor.type));
|
||||||
|
|
||||||
// This used to be a regex, but <regex> has an extreme cost to compile times.
|
// This used to be a regex, but <regex> has an extreme cost to compile times.
|
||||||
bool quantize = tensor.name.rfind("weight") == tensor.name.size() - 6; // ends with 'weight'?
|
bool quantize = tensor.name.rfind("weight") == tensor.name.size() - 6; // ends with 'weight'?
|
||||||
@ -1615,7 +1605,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
|
f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw format("type %s unsupported for integer quantization", llama_format_type(tensor.type));
|
throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("quantizing .. ");
|
printf("quantizing .. ");
|
||||||
|
Loading…
Reference in New Issue
Block a user