mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 04:00:16 +00:00
test-backend-ops : add cpy from f32 -> all types test
This commit is contained in:
parent
54d254bbed
commit
f1380d7897
25
ggml-cuda.cu
25
ggml-cuda.cu
@ -9316,6 +9316,30 @@ static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_ten
|
||||
return false;
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_CPY:
|
||||
{
|
||||
ggml_type src0_type = op->src[0]->type;
|
||||
ggml_type src1_type = op->src[1]->type;
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F16) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q8_0) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_0) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_1) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} break;
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
@ -9331,7 +9355,6 @@ static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_ten
|
||||
case GGML_OP_SCALE:
|
||||
case GGML_OP_SQR:
|
||||
case GGML_OP_CLAMP:
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_CONT:
|
||||
case GGML_OP_DIAG_MASK_INF:
|
||||
case GGML_OP_SOFT_MAX:
|
||||
|
@ -70,23 +70,27 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
||||
std::vector<uint8_t> buf(ggml_nbytes(t));
|
||||
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
||||
|
||||
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
|
||||
|
||||
// access elements by index to avoid gaps in views
|
||||
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
|
||||
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
|
||||
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
|
||||
for (int64_t i0 = 0; i0 < t->ne[0]; i0++) {
|
||||
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += ggml_blck_size(t->type)) {
|
||||
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0];
|
||||
float v;
|
||||
if (t->type == GGML_TYPE_F16) {
|
||||
v = (float) ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]);
|
||||
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
|
||||
} else if (t->type == GGML_TYPE_F32) {
|
||||
v = *(float *) &buf[i];
|
||||
tv.push_back(*(float *) &buf[i]);
|
||||
} else if (t->type == GGML_TYPE_I32) {
|
||||
v = *(int32_t *) &buf[i];
|
||||
tv.push_back((float)*(int32_t *) &buf[i]);
|
||||
} else if (ggml_is_quantized(t->type)) {
|
||||
std::vector<float> vq(ggml_blck_size(t->type));
|
||||
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
|
||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
tv.push_back(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -320,7 +324,7 @@ struct test_case {
|
||||
for (size_t i = 0; i < f1.size(); i++) {
|
||||
// check for nans
|
||||
if (std::isnan(f1[i]) || std::isnan(f2[i])) {
|
||||
printf("[%s] NaN at index %zu ", ggml_op_desc(t1), i);
|
||||
printf("[%s] NaN at index %zu (%f %f) ", ggml_op_desc(t1), i, f1[i], f2[i]);
|
||||
ud->ok = false;
|
||||
return true;
|
||||
}
|
||||
@ -1253,7 +1257,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 2}));
|
||||
|
||||
test_cases.emplace_back(new test_dup());
|
||||
test_cases.emplace_back(new test_cpy());
|
||||
|
||||
for (ggml_type type : all_types) {
|
||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, type, {256, 100, 100, 1}));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_cont());
|
||||
|
||||
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
||||
|
Loading…
Reference in New Issue
Block a user