mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 14:24:35 +00:00
197 lines
7.3 KiB
C++
197 lines
7.3 KiB
C++
|
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
|
// static RNG initialization (revisit if n_threads stops being constant)
|
|
static const size_t n_threads = std::thread::hardware_concurrency();
|
|
static std::vector<std::default_random_engine> generators = []() {
|
|
std::random_device rd;
|
|
std::vector<std::default_random_engine> vec;
|
|
vec.reserve(n_threads);
|
|
//for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
|
|
for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
|
|
return vec;
|
|
}();
|
|
|
|
size_t size = ggml_nelements(tensor);
|
|
std::vector<float> data(size);
|
|
|
|
auto init_thread = [&](size_t ith, size_t start, size_t end) {
|
|
std::uniform_real_distribution<float> distribution(min, max);
|
|
for (size_t i = start; i < end; i++) {
|
|
data[i] = distribution(generators[ith]);
|
|
}
|
|
};
|
|
|
|
std::vector<std::thread> threads;
|
|
threads.reserve(n_threads);
|
|
for (size_t i = 0; i < n_threads; i++) {
|
|
size_t start = i*size/n_threads;
|
|
size_t end = (i+1)*size/n_threads;
|
|
threads.emplace_back(init_thread, i, start, end);
|
|
}
|
|
for (auto & t : threads) {
|
|
t.join();
|
|
}
|
|
|
|
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
|
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
|
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
|
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
|
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
|
const float * im = imatrix.data();
|
|
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
|
// when the imatrix is optional, we want to test both quantization with and without imatrix
|
|
// use one of the random numbers to decide
|
|
if (data[0] > 0.5f*(min + max)) {
|
|
im = nullptr;
|
|
}
|
|
}
|
|
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
|
|
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
|
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
|
// This is going to create some weird integers though.
|
|
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
|
} else {
|
|
GGML_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
std::vector<float> tv;
|
|
tv.reserve(ggml_nelements(t));
|
|
|
|
std::vector<uint8_t> buf(ggml_nbytes(t));
|
|
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
|
|
|
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
|
|
size_t bs = ggml_blck_size(t->type);
|
|
std::vector<float> vq(ggml_blck_size(t->type));
|
|
bool quantized = ggml_is_quantized(t->type);
|
|
|
|
// access elements by index to avoid gaps in views
|
|
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
|
|
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
|
|
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
|
|
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) {
|
|
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
|
|
if (t->type == GGML_TYPE_F16) {
|
|
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
|
|
} else if (t->type == GGML_TYPE_F32) {
|
|
tv.push_back(*(float *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I32) {
|
|
tv.push_back((float)*(int32_t *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I16) {
|
|
tv.push_back((float)*(int16_t *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I8) {
|
|
tv.push_back((float)*(int8_t *) &buf[i]);
|
|
} else if (quantized) {
|
|
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
|
|
tv.insert(tv.end(), vq.begin(), vq.end());
|
|
} else {
|
|
GGML_ASSERT(false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return tv;
|
|
}
|
|
|
|
// normalized mean squared error = mse(a, b) / mse(a, 0)
|
|
static double nmse(const float * a, const float * b, size_t n) {
|
|
double mse_a_b = 0.0;
|
|
double mse_a_0 = 0.0;
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
float a_i = a[i];
|
|
float b_i = b[i];
|
|
|
|
mse_a_b += (a_i - b_i) * (a_i - b_i);
|
|
mse_a_0 += a_i * a_i;
|
|
}
|
|
|
|
return mse_a_b / mse_a_0;
|
|
}
|
|
|
|
static bool ggml_is_view_op(enum ggml_op op) {
|
|
return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
|
|
}
|
|
|
|
// utils for printing the variables of the test cases
|
|
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
|
|
template<typename T>
|
|
static std::string var_to_str(const T & x) {
|
|
return std::to_string(x);
|
|
}
|
|
|
|
template<typename T, size_t N>
|
|
static std::string var_to_str(const T (&x)[N]) {
|
|
std::string s = "[";
|
|
for (size_t i = 0; i < N; i++) {
|
|
if (i > 0) {
|
|
s += ",";
|
|
}
|
|
s += var_to_str(x[i]);
|
|
}
|
|
s += "]";
|
|
return s;
|
|
}
|
|
|
|
template<typename T, size_t N>
|
|
static std::string var_to_str(const std::array<T, N> & x) {
|
|
std::string s = "[";
|
|
for (size_t i = 0; i < N; i++) {
|
|
if (i > 0) {
|
|
s += ",";
|
|
}
|
|
s += var_to_str(x[i]);
|
|
}
|
|
s += "]";
|
|
return s;
|
|
}
|
|
|
|
//static std::string var_to_str(ggml_unary_op unary_op) {
|
|
// return ggml_unary_op_name(unary_op);
|
|
//}
|
|
|
|
static std::string var_to_str(ggml_type type) {
|
|
return ggml_type_name(type);
|
|
}
|
|
|
|
static std::string var_to_str(ggml_op_pool pool) {
|
|
switch (pool) {
|
|
case GGML_OP_POOL_AVG: return "avg";
|
|
case GGML_OP_POOL_MAX: return "max";
|
|
default: return std::to_string(pool);
|
|
}
|
|
}
|
|
|
|
#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
#define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b)
|
|
#define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c)
|
|
#define VARS_TO_STR4(a, b, c, d) VAR_TO_STR(a) + "," + VARS_TO_STR3(b, c, d)
|
|
#define VARS_TO_STR5(a, b, c, d, e) VAR_TO_STR(a) + "," + VARS_TO_STR4(b, c, d, e)
|
|
#define VARS_TO_STR6(a, b, c, d, e, f) VAR_TO_STR(a) + "," + VARS_TO_STR5(b, c, d, e, f)
|
|
#define VARS_TO_STR7(a, b, c, d, e, f, g) VAR_TO_STR(a) + "," + VARS_TO_STR6(b, c, d, e, f, g)
|
|
#define VARS_TO_STR8(a, b, c, d, e, f, g, h) VAR_TO_STR(a) + "," + VARS_TO_STR7(b, c, d, e, f, g, h)
|
|
#define VARS_TO_STR9(a, b, c, d, e, f, g, h, i) VAR_TO_STR(a) + "," + VARS_TO_STR8(b, c, d, e, f, g, h, i)
|
|
#define VARS_TO_STR10(a, b, c, d, e, f, g, h, i, j) VAR_TO_STR(a) + "," + VARS_TO_STR9(b, c, d, e, f, g, h, i, j)
|
|
#define VARS_TO_STR11(a, b, c, d, e, f, g, h, i, j, k) VAR_TO_STR(a) + "," + VARS_TO_STR10(b, c, d, e, f, g, h, i, j, k)
|
|
#define VARS_TO_STR12(a, b, c, d, e, f, g, h, i, j, k, l) VAR_TO_STR(a) + "," + VARS_TO_STR11(b, c, d, e, f, g, h, i, j, k, l)
|
|
|
|
#ifdef GGML_USE_SYCL
|
|
static bool inline _isinf(float f) {
|
|
return (*(uint32_t *)&f & 0x7fffffff) == 0x7f800000;
|
|
}
|
|
#else
|
|
static bool inline _isinf(float f) { return std::isinf(f); }
|
|
#endif
|
|
|
|
// accept FLT_MAX as infinity
|
|
static bool isinf_or_max(float f) {
|
|
return _isinf(f) || f == FLT_MAX || f == -FLT_MAX;
|
|
}
|
|
|