mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 10:24:35 +00:00
sync : ggml (conv 1d + 2d updates, UB fixes) (#3468)
* sync : ggml (conv 1d + 2d updates) ggml-ci * ggml : fix UB in q5_0 and q5_1 quantize code ggml.c:1033:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ggml.c:1081:39: runtime error: left shift of 1 by 31 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ggml-ci * tests : fix UB in test-quantize-perf
This commit is contained in:
parent
f72f8f22c9
commit
f93af02488
13
ggml.h
13
ggml.h
@ -401,10 +401,14 @@ extern "C" {
|
|||||||
GGML_OP_CLAMP,
|
GGML_OP_CLAMP,
|
||||||
GGML_OP_CONV_1D,
|
GGML_OP_CONV_1D,
|
||||||
GGML_OP_CONV_2D,
|
GGML_OP_CONV_2D,
|
||||||
|
GGML_OP_CONV_TRANSPOSE_1D,
|
||||||
GGML_OP_CONV_TRANSPOSE_2D,
|
GGML_OP_CONV_TRANSPOSE_2D,
|
||||||
GGML_OP_POOL_1D,
|
GGML_OP_POOL_1D,
|
||||||
GGML_OP_POOL_2D,
|
GGML_OP_POOL_2D,
|
||||||
|
|
||||||
|
GGML_OP_CONV_1D_STAGE_0, // internal
|
||||||
|
GGML_OP_CONV_1D_STAGE_1, // internal
|
||||||
|
|
||||||
GGML_OP_UPSCALE, // nearest interpolate
|
GGML_OP_UPSCALE, // nearest interpolate
|
||||||
|
|
||||||
GGML_OP_FLASH_ATTN,
|
GGML_OP_FLASH_ATTN,
|
||||||
@ -1386,6 +1390,14 @@ extern "C" {
|
|||||||
int s,
|
int s,
|
||||||
int d);
|
int d);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
int s0,
|
||||||
|
int p0,
|
||||||
|
int d0);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -1759,6 +1771,7 @@ extern "C" {
|
|||||||
GGML_OPT_NO_CONTEXT,
|
GGML_OPT_NO_CONTEXT,
|
||||||
GGML_OPT_INVALID_WOLFE,
|
GGML_OPT_INVALID_WOLFE,
|
||||||
GGML_OPT_FAIL,
|
GGML_OPT_FAIL,
|
||||||
|
GGML_OPT_CANCEL,
|
||||||
|
|
||||||
GGML_LINESEARCH_FAIL = -128,
|
GGML_LINESEARCH_FAIL = -128,
|
||||||
GGML_LINESEARCH_MINIMUM_STEP,
|
GGML_LINESEARCH_MINIMUM_STEP,
|
||||||
|
@ -69,7 +69,6 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
|
|||||||
// 2-6 bit quantization in super-blocks
|
// 2-6 bit quantization in super-blocks
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// ===================== Helper functions
|
// ===================== Helper functions
|
||||||
//
|
//
|
||||||
@ -348,7 +347,6 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
|||||||
const float q4scale = 15.f;
|
const float q4scale = 15.f;
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
for (int i = 0; i < nb; i++) {
|
||||||
|
|
||||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||||
float max_min = 0;
|
float max_min = 0;
|
||||||
for (int j = 0; j < QK_K/16; ++j) {
|
for (int j = 0; j < QK_K/16; ++j) {
|
||||||
|
@ -208,26 +208,6 @@ static struct ggml_tensor * get_random_tensor_i32(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_elements(const char* label, const struct ggml_tensor * t) {
|
|
||||||
if (!t) {
|
|
||||||
printf("%s: %s = null\n", __func__, label);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const int nelements = ggml_nelements(t);
|
|
||||||
printf("%s: %s = [", __func__, label);
|
|
||||||
for (int k = 0; k < nelements; ++k) {
|
|
||||||
if (k > 0) { printf(", "); }
|
|
||||||
printf("%.5f", ggml_get_f32_1d(t, k));
|
|
||||||
}
|
|
||||||
printf("] shape: [");
|
|
||||||
for (int k = 0; k < t->n_dims; ++k) {
|
|
||||||
if (k > 0) { printf(", "); }
|
|
||||||
printf("%d", (int)t->ne[k]);
|
|
||||||
}
|
|
||||||
printf("]\n");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool check_gradient(
|
static bool check_gradient(
|
||||||
const char * op_name,
|
const char * op_name,
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
|
@ -40,27 +40,6 @@ static float frand(void) {
|
|||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int irand(int n) {
|
|
||||||
return rand()%n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_random_dims(int64_t * dims, int ndims) {
|
|
||||||
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < ndims; i++) {
|
|
||||||
dims[i] = 1 + irand(4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
|
|
||||||
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < ndims; i++) {
|
|
||||||
dims[i] = min + irand(max-min);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static struct ggml_tensor * get_random_tensor(
|
static struct ggml_tensor * get_random_tensor(
|
||||||
struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
|
struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
|
||||||
) {
|
) {
|
||||||
@ -106,14 +85,6 @@ static struct ggml_tensor * get_random_tensor(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float get_element(const struct ggml_tensor * t, int idx) {
|
|
||||||
return ((float *)t->data)[idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
static void set_element(struct ggml_tensor * t, int idx, float value) {
|
|
||||||
((float *)t->data)[idx] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/* .mem_size = */ 1024*1024*1024,
|
/* .mem_size = */ 1024*1024*1024,
|
||||||
|
@ -76,22 +76,21 @@ static void * align_with_offset(void * ptr, int offset) {
|
|||||||
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
|
static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<float(void)> & func) {
|
||||||
int64_t min_time_us = INT64_MAX;
|
int64_t min_time_us = INT64_MAX;
|
||||||
int64_t total_time_us = 0;
|
int64_t total_time_us = 0;
|
||||||
int64_t min_time_cycles = INT64_MAX;
|
int64_t min_time_cycles = INT64_MAX;
|
||||||
int64_t total_time_cycles = 0;
|
int64_t total_time_cycles = 0;
|
||||||
|
|
||||||
for (int i = 0; i < WARMUP; i++) {
|
for (int i = 0; i < WARMUP; i++) {
|
||||||
function();
|
func();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < iterations; i++) {
|
for (int i = 0; i < iterations; i++) {
|
||||||
const int64_t start_time = ggml_time_us();
|
const int64_t start_time = ggml_time_us();
|
||||||
const int64_t start_cycles = cpu_cycles();
|
const int64_t start_cycles = cpu_cycles();
|
||||||
|
|
||||||
function();
|
func();
|
||||||
|
|
||||||
const int64_t end_cycles = cpu_cycles();
|
const int64_t end_cycles = cpu_cycles();
|
||||||
const int64_t end_time = ggml_time_us();
|
const int64_t end_time = ggml_time_us();
|
||||||
@ -245,15 +244,15 @@ int main(int argc, char * argv[]) {
|
|||||||
|
|
||||||
std::vector<uint8_t> test_data1_v(largest*4 + MAX_ALIGNMENT*2);
|
std::vector<uint8_t> test_data1_v(largest*4 + MAX_ALIGNMENT*2);
|
||||||
std::vector<uint8_t> test_data2_v(largest*4 + MAX_ALIGNMENT*2);
|
std::vector<uint8_t> test_data2_v(largest*4 + MAX_ALIGNMENT*2);
|
||||||
std::vector<uint8_t> test_q1_v(largest*4 + MAX_ALIGNMENT*2);
|
std::vector<uint8_t> test_q1_v (largest*4 + MAX_ALIGNMENT*2);
|
||||||
std::vector<uint8_t> test_q2_v(largest*4 + MAX_ALIGNMENT*2);
|
std::vector<uint8_t> test_q2_v (largest*4 + MAX_ALIGNMENT*2);
|
||||||
std::vector<uint8_t> test_out_v(largest*4 + MAX_ALIGNMENT*2);
|
std::vector<uint8_t> test_out_v (largest*4 + MAX_ALIGNMENT*2);
|
||||||
|
|
||||||
float * test_data1 = (float *) align_with_offset(test_data1_v.data(), params.alignment_offset);
|
float * test_data1 = (float *) align_with_offset(test_data1_v.data(), params.alignment_offset);
|
||||||
float * test_data2 = (float *) align_with_offset(test_data2_v.data(), params.alignment_offset);
|
float * test_data2 = (float *) align_with_offset(test_data2_v.data(), params.alignment_offset);
|
||||||
float * test_q1 = (float *) align_with_offset(test_q1_v.data(), params.alignment_offset);
|
float * test_q1 = (float *) align_with_offset(test_q1_v.data(), params.alignment_offset);
|
||||||
float * test_q2 = (float *) align_with_offset(test_q2_v.data(), params.alignment_offset);
|
float * test_q2 = (float *) align_with_offset(test_q2_v.data(), params.alignment_offset);
|
||||||
float * test_out = (float *) align_with_offset(test_out_v.data(), params.alignment_offset);
|
float * test_out = (float *) align_with_offset(test_out_v.data(), params.alignment_offset);
|
||||||
|
|
||||||
generate_data(0, largest, test_data1);
|
generate_data(0, largest, test_data1);
|
||||||
generate_data(1, largest, test_data2);
|
generate_data(1, largest, test_data2);
|
||||||
@ -283,7 +282,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q_reference\n");
|
printf(" quantize_row_q_reference\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.from_float_reference(test_data1, test_q1, size);
|
qfns.from_float_reference(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
};
|
};
|
||||||
@ -297,7 +296,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q\n");
|
printf(" quantize_row_q\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.from_float(test_data1, test_q1, size);
|
qfns.from_float(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
};
|
};
|
||||||
@ -312,7 +311,7 @@ int main(int argc, char * argv[]) {
|
|||||||
qfns.from_float(test_data1, test_q1, largest);
|
qfns.from_float(test_data1, test_q1, largest);
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.to_float(test_q1, test_out, size);
|
qfns.to_float(test_q1, test_out, size);
|
||||||
return test_out[0];
|
return test_out[0];
|
||||||
};
|
};
|
||||||
@ -326,7 +325,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q_dot\n");
|
printf(" quantize_row_q_dot\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
||||||
vdot.from_float(test_data1, test_q1, size);
|
vdot.from_float(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
@ -343,7 +342,7 @@ int main(int argc, char * argv[]) {
|
|||||||
qfns.from_float(test_data2, test_q2, largest);
|
qfns.from_float(test_data2, test_q2, largest);
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
float result;
|
float result;
|
||||||
qfns.vec_dot(size, &result, test_q1, test_q2);
|
qfns.vec_dot(size, &result, test_q1, test_q2);
|
||||||
return result;
|
return result;
|
||||||
|
Loading…
Reference in New Issue
Block a user