static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { // static RNG initialization (revisit if n_threads stops being constant) static const size_t n_threads = std::thread::hardware_concurrency(); static std::vector generators = []() { std::random_device rd; std::vector vec; vec.reserve(n_threads); //for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); } return vec; }(); size_t size = ggml_nelements(tensor); std::vector data(size); auto init_thread = [&](size_t ith, size_t start, size_t end) { std::uniform_real_distribution distribution(min, max); for (size_t i = start; i < end; i++) { data[i] = distribution(generators[ith]); } }; std::vector threads; threads.reserve(n_threads); for (size_t i = 0; i < n_threads; i++) { size_t start = i*size/n_threads; size_t end = (i+1)*size/n_threads; threads.emplace_back(init_thread, i, start, end); } for (auto & t : threads) { t.join(); } if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); } else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) { GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0); std::vector dataq(ggml_row_size(tensor->type, size)); std::vector imatrix(tensor->ne[0], 1.0f); // dummy importance matrix const float * im = imatrix.data(); if (!ggml_quantize_requires_imatrix(tensor->type)) { // when the imatrix is optional, we want to test both quantization with and without imatrix // use one of the random numbers to decide if (data[0] > 0.5f*(min + max)) { im = nullptr; } } ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { // This is going to create some weird integers though. ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor)); } else { GGML_ASSERT(false); } } static std::vector tensor_to_float(const ggml_tensor * t) { std::vector tv; tv.reserve(ggml_nelements(t)); std::vector buf(ggml_nbytes(t)); ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t)); ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type); size_t bs = ggml_blck_size(t->type); std::vector vq(ggml_blck_size(t->type)); bool quantized = ggml_is_quantized(t->type); // access elements by index to avoid gaps in views for (int64_t i3 = 0; i3 < t->ne[3]; i3++) { for (int64_t i2 = 0; i2 < t->ne[2]; i2++) { for (int64_t i1 = 0; i1 < t->ne[1]; i1++) { for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) { size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0]; if (t->type == GGML_TYPE_F16) { tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i])); } else if (t->type == GGML_TYPE_F32) { tv.push_back(*(float *) &buf[i]); } else if (t->type == GGML_TYPE_I32) { tv.push_back((float)*(int32_t *) &buf[i]); } else if (t->type == GGML_TYPE_I16) { tv.push_back((float)*(int16_t *) &buf[i]); } else if (t->type == GGML_TYPE_I8) { tv.push_back((float)*(int8_t *) &buf[i]); } else if (quantized) { tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type)); tv.insert(tv.end(), vq.begin(), vq.end()); } else { GGML_ASSERT(false); } } } } } return tv; } // normalized mean squared error = mse(a, b) / mse(a, 0) static double nmse(const float * a, const float * b, size_t n) { double mse_a_b = 0.0; double mse_a_0 = 0.0; for (size_t i = 0; i < n; i++) { float a_i = a[i]; float b_i = b[i]; mse_a_b += (a_i - b_i) * (a_i - b_i); mse_a_0 += a_i * a_i; } return mse_a_b / mse_a_0; } static bool ggml_is_view_op(enum ggml_op op) { return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE; } // utils for printing the variables of the test cases #define VAR_TO_STR(x) (#x "=" + var_to_str(x)) template static std::string var_to_str(const T & x) { return std::to_string(x); } template static std::string var_to_str(const T (&x)[N]) { std::string s = "["; for (size_t i = 0; i < N; i++) { if (i > 0) { s += ","; } s += var_to_str(x[i]); } s += "]"; return s; } template static std::string var_to_str(const std::array & x) { std::string s = "["; for (size_t i = 0; i < N; i++) { if (i > 0) { s += ","; } s += var_to_str(x[i]); } s += "]"; return s; } //static std::string var_to_str(ggml_unary_op unary_op) { // return ggml_unary_op_name(unary_op); //} static std::string var_to_str(ggml_type type) { return ggml_type_name(type); } static std::string var_to_str(ggml_op_pool pool) { switch (pool) { case GGML_OP_POOL_AVG: return "avg"; case GGML_OP_POOL_MAX: return "max"; default: return std::to_string(pool); } } #define VARS_TO_STR1(a) VAR_TO_STR(a) #define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b) #define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c) #define VARS_TO_STR4(a, b, c, d) VAR_TO_STR(a) + "," + VARS_TO_STR3(b, c, d) #define VARS_TO_STR5(a, b, c, d, e) VAR_TO_STR(a) + "," + VARS_TO_STR4(b, c, d, e) #define VARS_TO_STR6(a, b, c, d, e, f) VAR_TO_STR(a) + "," + VARS_TO_STR5(b, c, d, e, f) #define VARS_TO_STR7(a, b, c, d, e, f, g) VAR_TO_STR(a) + "," + VARS_TO_STR6(b, c, d, e, f, g) #define VARS_TO_STR8(a, b, c, d, e, f, g, h) VAR_TO_STR(a) + "," + VARS_TO_STR7(b, c, d, e, f, g, h) #define VARS_TO_STR9(a, b, c, d, e, f, g, h, i) VAR_TO_STR(a) + "," + VARS_TO_STR8(b, c, d, e, f, g, h, i) #define VARS_TO_STR10(a, b, c, d, e, f, g, h, i, j) VAR_TO_STR(a) + "," + VARS_TO_STR9(b, c, d, e, f, g, h, i, j) #define VARS_TO_STR11(a, b, c, d, e, f, g, h, i, j, k) VAR_TO_STR(a) + "," + VARS_TO_STR10(b, c, d, e, f, g, h, i, j, k) #define VARS_TO_STR12(a, b, c, d, e, f, g, h, i, j, k, l) VAR_TO_STR(a) + "," + VARS_TO_STR11(b, c, d, e, f, g, h, i, j, k, l) #ifdef GGML_USE_SYCL static bool inline _isinf(float f) { return (*(uint32_t *)&f & 0x7fffffff) == 0x7f800000; } #else static bool inline _isinf(float f) { return std::isinf(f); } #endif // accept FLT_MAX as infinity static bool isinf_or_max(float f) { return _isinf(f) || f == FLT_MAX || f == -FLT_MAX; }