diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index a85145367..644d46a62 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -314,7 +314,7 @@ struct lora_merge_ctx { // optionally dequantize it printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type)); auto nels = ggml_nelements(inp_base); - const auto * qtype = ggml_internal_get_type_traits(base->type); + const auto * qtype = ggml_get_type_traits(base->type); std::vector dequant_buf(nels * sizeof(float)); qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels); ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size()); diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 9fe56d65f..e372856c6 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -371,7 +371,7 @@ int main(int argc, char ** argv) { if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) { continue; } - const auto * qfns = ggml_internal_get_type_traits(type); + const auto * qfns = ggml_get_type_traits(type); if (qfns->from_float && qfns->to_float) { if (params.verbose) { printf("testing %s ...\n", ggml_type_name(type)); diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 74b73a556..4508da4fb 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -2553,7 +2553,7 @@ extern "C" { ggml_gemm_t gemm; }; - GGML_API const struct ggml_type_traits * ggml_internal_get_type_traits(enum ggml_type type); + GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type); #ifdef __cplusplus } diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 17b20debf..627b4dbc7 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st op->type != GGML_TYPE_IQ1_S && op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float case GGML_OP_MUL_MAT: - return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type)->vec_dot_type; + return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type; case GGML_OP_ROPE_BACK: return op->src[2] == NULL && (op->op_params[2] & 4) == 0; case GGML_OP_IM2COL_BACK: diff --git a/ggml/src/ggml-blas.cpp b/ggml/src/ggml-blas.cpp index 846323b84..55f724586 100644 --- a/ggml/src/ggml-blas.cpp +++ b/ggml/src/ggml-blas.cpp @@ -65,7 +65,7 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg // convert src0 to float if (type != GGML_TYPE_F32) { - const auto * type_traits = ggml_internal_get_type_traits(type); + const auto * type_traits = ggml_get_type_traits(type); ggml_to_float_t const to_float = type_traits->to_float; for (int64_t i03 = 0; i03 < ne03; i03++) { @@ -424,7 +424,7 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s ggml_is_contiguous(src1) && src1->type == GGML_TYPE_F32 && (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && - (src0->type == GGML_TYPE_F32 || ggml_internal_get_type_traits(src0->type)->to_float != NULL); + (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); } case GGML_OP_OUT_PROD: @@ -434,7 +434,7 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s ggml_is_matrix(src1) && ggml_is_contiguous(src0) && (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) && - (src0->type == GGML_TYPE_F32 || ggml_internal_get_type_traits(src0->type)->to_float != NULL); + (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); default: return false; diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index a9e9d50cf..374c6ecd7 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -5287,7 +5287,7 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg return; } - const auto * tt = ggml_internal_get_type_traits(quant); + const auto * tt = ggml_get_type_traits(quant); ggml_to_float_t dequant_fn = tt->to_float; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 6691adc88..3f01092d9 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1151,7 +1151,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = { }; // For internal test use -const struct ggml_type_traits * ggml_internal_get_type_traits(enum ggml_type type) { +const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) { GGML_ASSERT(type < GGML_TYPE_COUNT); return &type_traits[type]; } diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp index fe31388fe..131d7c177 100644 --- a/pocs/vdot/q8dot.cpp +++ b/pocs/vdot/q8dot.cpp @@ -136,7 +136,7 @@ int main(int argc, char** argv) { auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1; - const auto * funcs = ggml_internal_get_type_traits(ggml_type); + const auto * funcs = ggml_get_type_traits(ggml_type); Stat simple, ggml; diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 4fa24ffc4..88e66ea13 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -236,7 +236,7 @@ int main(int argc, char** argv) { int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64); int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64); - const auto * funcs = useQ4_1 ? ggml_internal_get_type_traits(GGML_TYPE_Q4_1) : ggml_internal_get_type_traits(GGML_TYPE_Q4_0); + const auto * funcs = useQ4_1 ? ggml_get_type_traits(GGML_TYPE_Q4_1) : ggml_get_type_traits(GGML_TYPE_Q4_0); std::vector q40; std::vector q41; @@ -282,7 +282,7 @@ int main(int argc, char** argv) { dot_q4_q8(kVecSize, &result, q40.data(), q8.data()); } else { - const auto * vdot = ggml_internal_get_type_traits(funcs->vec_dot_type); + const auto * vdot = ggml_get_type_traits(funcs->vec_dot_type); vdot->from_float(y1.data(), q8.data(), kVecSize); if (useQ4_1) funcs->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1); else funcs->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1); diff --git a/src/llama.cpp b/src/llama.cpp index 32c7458ef..01cdf17dc 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17872,7 +17872,7 @@ static void llama_tensor_dequantize_internal( } float * f32_output = (float *) output.data(); - const ggml_type_traits * qtype = ggml_internal_get_type_traits(tensor->type); + const ggml_type_traits * qtype = ggml_get_type_traits(tensor->type); if (ggml_is_quantized(tensor->type)) { if (qtype->to_float == NULL) { throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type))); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 81440cebd..ee1a8877e 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -133,7 +133,7 @@ static std::vector tensor_to_float(const ggml_tensor * t) { std::vector buf(ggml_nbytes(t)); ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t)); - const auto * tt = ggml_internal_get_type_traits(t->type); + const auto * tt = ggml_get_type_traits(t->type); size_t bs = ggml_blck_size(t->type); std::vector vq(ggml_blck_size(t->type)); bool quantized = ggml_is_quantized(t->type); diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 98a143300..d50417ba0 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -83,7 +83,7 @@ static float dot_product_error( std::vector tmp_q1(2*test_size); std::vector tmp_q2(2*test_size); - const auto * vdot = ggml_internal_get_type_traits(qfns->vec_dot_type); + const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type); qfns->from_float(test_data1, tmp_q1.data(), test_size); vdot->from_float(test_data2, tmp_q2.data(), test_size); @@ -131,7 +131,7 @@ int main(int argc, char * argv[]) { for (int i = 0; i < GGML_TYPE_COUNT; i++) { ggml_type type = (ggml_type) i; - const auto * qfns = ggml_internal_get_type_traits(type); + const auto * qfns = ggml_get_type_traits(type); // deprecated - skip if (qfns->blck_size == 0) { diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 8219b3723..bdbdd90a8 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -122,7 +122,7 @@ static void usage(char * argv[]) { printf(" --type TYPE set test type as"); for (int i = 0; i < GGML_TYPE_COUNT; i++) { ggml_type type = (ggml_type) i; - const auto * qfns = ggml_internal_get_type_traits(type); + const auto * qfns = ggml_get_type_traits(type); if (ggml_type_name(type) != NULL) { if (qfns->from_float && qfns->to_float) { printf(" %s", ggml_type_name(type)); @@ -270,7 +270,7 @@ int main(int argc, char * argv[]) { for (int i = 0; i < GGML_TYPE_COUNT; i++) { ggml_type type = (ggml_type) i; - const auto * qfns = ggml_internal_get_type_traits(type); + const auto * qfns = ggml_get_type_traits(type); if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) { continue; } @@ -328,7 +328,7 @@ int main(int argc, char * argv[]) { for (size_t size : params.test_sizes) { printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024)); auto quantize_fn = [&](void) -> float { - const auto * vdot = ggml_internal_get_type_traits(qfns->vec_dot_type); + const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type); vdot->from_float(test_data1, test_q1, size); return test_q1[0]; };