mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-10 10:41:47 +00:00
ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits
Some checks failed
flake8 Lint / Lint (push) Has been cancelled
Some checks failed
flake8 Lint / Lint (push) Has been cancelled
it's not really internal if everybody uses it
This commit is contained in:
parent
e2e10ff199
commit
d74105f2c7
@ -314,7 +314,7 @@ struct lora_merge_ctx {
|
|||||||
// optionally dequantize it
|
// optionally dequantize it
|
||||||
printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
|
printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
|
||||||
auto nels = ggml_nelements(inp_base);
|
auto nels = ggml_nelements(inp_base);
|
||||||
const auto * qtype = ggml_internal_get_type_traits(base->type);
|
const auto * qtype = ggml_get_type_traits(base->type);
|
||||||
std::vector<uint8_t> dequant_buf(nels * sizeof(float));
|
std::vector<uint8_t> dequant_buf(nels * sizeof(float));
|
||||||
qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
|
qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
|
||||||
ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
|
ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
|
||||||
|
@ -371,7 +371,7 @@ int main(int argc, char ** argv) {
|
|||||||
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
|
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto * qfns = ggml_internal_get_type_traits(type);
|
const auto * qfns = ggml_get_type_traits(type);
|
||||||
if (qfns->from_float && qfns->to_float) {
|
if (qfns->from_float && qfns->to_float) {
|
||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf("testing %s ...\n", ggml_type_name(type));
|
printf("testing %s ...\n", ggml_type_name(type));
|
||||||
|
@ -2553,7 +2553,7 @@ extern "C" {
|
|||||||
ggml_gemm_t gemm;
|
ggml_gemm_t gemm;
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API const struct ggml_type_traits * ggml_internal_get_type_traits(enum ggml_type type);
|
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|||||||
op->type != GGML_TYPE_IQ1_S &&
|
op->type != GGML_TYPE_IQ1_S &&
|
||||||
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
||||||
case GGML_OP_MUL_MAT:
|
case GGML_OP_MUL_MAT:
|
||||||
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type)->vec_dot_type;
|
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
|
||||||
case GGML_OP_ROPE_BACK:
|
case GGML_OP_ROPE_BACK:
|
||||||
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
||||||
case GGML_OP_IM2COL_BACK:
|
case GGML_OP_IM2COL_BACK:
|
||||||
|
@ -65,7 +65,7 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|||||||
|
|
||||||
// convert src0 to float
|
// convert src0 to float
|
||||||
if (type != GGML_TYPE_F32) {
|
if (type != GGML_TYPE_F32) {
|
||||||
const auto * type_traits = ggml_internal_get_type_traits(type);
|
const auto * type_traits = ggml_get_type_traits(type);
|
||||||
ggml_to_float_t const to_float = type_traits->to_float;
|
ggml_to_float_t const to_float = type_traits->to_float;
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||||
@ -424,7 +424,7 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
|
|||||||
ggml_is_contiguous(src1) &&
|
ggml_is_contiguous(src1) &&
|
||||||
src1->type == GGML_TYPE_F32 &&
|
src1->type == GGML_TYPE_F32 &&
|
||||||
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
|
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
|
||||||
(src0->type == GGML_TYPE_F32 || ggml_internal_get_type_traits(src0->type)->to_float != NULL);
|
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
case GGML_OP_OUT_PROD:
|
case GGML_OP_OUT_PROD:
|
||||||
@ -434,7 +434,7 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
|
|||||||
ggml_is_matrix(src1) &&
|
ggml_is_matrix(src1) &&
|
||||||
ggml_is_contiguous(src0) &&
|
ggml_is_contiguous(src0) &&
|
||||||
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
|
||||||
(src0->type == GGML_TYPE_F32 || ggml_internal_get_type_traits(src0->type)->to_float != NULL);
|
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -5287,7 +5287,7 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto * tt = ggml_internal_get_type_traits(quant);
|
const auto * tt = ggml_get_type_traits(quant);
|
||||||
|
|
||||||
ggml_to_float_t dequant_fn = tt->to_float;
|
ggml_to_float_t dequant_fn = tt->to_float;
|
||||||
|
|
||||||
|
@ -1151,7 +1151,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// For internal test use
|
// For internal test use
|
||||||
const struct ggml_type_traits * ggml_internal_get_type_traits(enum ggml_type type) {
|
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
||||||
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
||||||
return &type_traits[type];
|
return &type_traits[type];
|
||||||
}
|
}
|
||||||
|
@ -136,7 +136,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1;
|
auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1;
|
||||||
|
|
||||||
const auto * funcs = ggml_internal_get_type_traits(ggml_type);
|
const auto * funcs = ggml_get_type_traits(ggml_type);
|
||||||
|
|
||||||
Stat simple, ggml;
|
Stat simple, ggml;
|
||||||
|
|
||||||
|
@ -236,7 +236,7 @@ int main(int argc, char** argv) {
|
|||||||
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
||||||
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
||||||
|
|
||||||
const auto * funcs = useQ4_1 ? ggml_internal_get_type_traits(GGML_TYPE_Q4_1) : ggml_internal_get_type_traits(GGML_TYPE_Q4_0);
|
const auto * funcs = useQ4_1 ? ggml_get_type_traits(GGML_TYPE_Q4_1) : ggml_get_type_traits(GGML_TYPE_Q4_0);
|
||||||
|
|
||||||
std::vector<block_q4_0> q40;
|
std::vector<block_q4_0> q40;
|
||||||
std::vector<block_q4_1> q41;
|
std::vector<block_q4_1> q41;
|
||||||
@ -282,7 +282,7 @@ int main(int argc, char** argv) {
|
|||||||
dot_q4_q8(kVecSize, &result, q40.data(), q8.data());
|
dot_q4_q8(kVecSize, &result, q40.data(), q8.data());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const auto * vdot = ggml_internal_get_type_traits(funcs->vec_dot_type);
|
const auto * vdot = ggml_get_type_traits(funcs->vec_dot_type);
|
||||||
vdot->from_float(y1.data(), q8.data(), kVecSize);
|
vdot->from_float(y1.data(), q8.data(), kVecSize);
|
||||||
if (useQ4_1) funcs->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
|
if (useQ4_1) funcs->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
|
||||||
else funcs->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
|
else funcs->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
|
||||||
|
@ -17872,7 +17872,7 @@ static void llama_tensor_dequantize_internal(
|
|||||||
}
|
}
|
||||||
float * f32_output = (float *) output.data();
|
float * f32_output = (float *) output.data();
|
||||||
|
|
||||||
const ggml_type_traits * qtype = ggml_internal_get_type_traits(tensor->type);
|
const ggml_type_traits * qtype = ggml_get_type_traits(tensor->type);
|
||||||
if (ggml_is_quantized(tensor->type)) {
|
if (ggml_is_quantized(tensor->type)) {
|
||||||
if (qtype->to_float == NULL) {
|
if (qtype->to_float == NULL) {
|
||||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type)));
|
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type)));
|
||||||
|
@ -133,7 +133,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||||||
std::vector<uint8_t> buf(ggml_nbytes(t));
|
std::vector<uint8_t> buf(ggml_nbytes(t));
|
||||||
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
||||||
|
|
||||||
const auto * tt = ggml_internal_get_type_traits(t->type);
|
const auto * tt = ggml_get_type_traits(t->type);
|
||||||
size_t bs = ggml_blck_size(t->type);
|
size_t bs = ggml_blck_size(t->type);
|
||||||
std::vector<float> vq(ggml_blck_size(t->type));
|
std::vector<float> vq(ggml_blck_size(t->type));
|
||||||
bool quantized = ggml_is_quantized(t->type);
|
bool quantized = ggml_is_quantized(t->type);
|
||||||
|
@ -83,7 +83,7 @@ static float dot_product_error(
|
|||||||
std::vector<uint8_t> tmp_q1(2*test_size);
|
std::vector<uint8_t> tmp_q1(2*test_size);
|
||||||
std::vector<uint8_t> tmp_q2(2*test_size);
|
std::vector<uint8_t> tmp_q2(2*test_size);
|
||||||
|
|
||||||
const auto * vdot = ggml_internal_get_type_traits(qfns->vec_dot_type);
|
const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
|
||||||
|
|
||||||
qfns->from_float(test_data1, tmp_q1.data(), test_size);
|
qfns->from_float(test_data1, tmp_q1.data(), test_size);
|
||||||
vdot->from_float(test_data2, tmp_q2.data(), test_size);
|
vdot->from_float(test_data2, tmp_q2.data(), test_size);
|
||||||
@ -131,7 +131,7 @@ int main(int argc, char * argv[]) {
|
|||||||
|
|
||||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
ggml_type type = (ggml_type) i;
|
ggml_type type = (ggml_type) i;
|
||||||
const auto * qfns = ggml_internal_get_type_traits(type);
|
const auto * qfns = ggml_get_type_traits(type);
|
||||||
|
|
||||||
// deprecated - skip
|
// deprecated - skip
|
||||||
if (qfns->blck_size == 0) {
|
if (qfns->blck_size == 0) {
|
||||||
|
@ -122,7 +122,7 @@ static void usage(char * argv[]) {
|
|||||||
printf(" --type TYPE set test type as");
|
printf(" --type TYPE set test type as");
|
||||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
ggml_type type = (ggml_type) i;
|
ggml_type type = (ggml_type) i;
|
||||||
const auto * qfns = ggml_internal_get_type_traits(type);
|
const auto * qfns = ggml_get_type_traits(type);
|
||||||
if (ggml_type_name(type) != NULL) {
|
if (ggml_type_name(type) != NULL) {
|
||||||
if (qfns->from_float && qfns->to_float) {
|
if (qfns->from_float && qfns->to_float) {
|
||||||
printf(" %s", ggml_type_name(type));
|
printf(" %s", ggml_type_name(type));
|
||||||
@ -270,7 +270,7 @@ int main(int argc, char * argv[]) {
|
|||||||
|
|
||||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
ggml_type type = (ggml_type) i;
|
ggml_type type = (ggml_type) i;
|
||||||
const auto * qfns = ggml_internal_get_type_traits(type);
|
const auto * qfns = ggml_get_type_traits(type);
|
||||||
if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
|
if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -328,7 +328,7 @@ int main(int argc, char * argv[]) {
|
|||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void) -> float {
|
auto quantize_fn = [&](void) -> float {
|
||||||
const auto * vdot = ggml_internal_get_type_traits(qfns->vec_dot_type);
|
const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
|
||||||
vdot->from_float(test_data1, test_q1, size);
|
vdot->from_float(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user