ggml : improve ggml_is_contiguous logic (#7856)

* ggml : improve ggml_is_contiguous logic

ggml-ci

* ggml : support more contiguous cases

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-06-12 15:24:20 +03:00 committed by GitHub
parent 704a35b183
commit bfaa676b08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

75
ggml.c
View File

@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
return tensor->nb[0] > tensor->nb[1]; return tensor->nb[0] > tensor->nb[1];
} }
GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) { static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); size_t next_nb = ggml_type_size(tensor->type);
if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
return false;
}
next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
for (int i = 1; i < GGML_MAX_DIMS; i++) {
if (tensor->ne[i] != 1) {
if (i > n) {
if (tensor->nb[i] != next_nb) {
return false;
}
next_nb *= tensor->ne[i];
} else {
// this dimension does not need to be contiguous
next_nb = tensor->ne[i]*tensor->nb[i];
}
}
}
return true;
}
return GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
tensor->nb[0] == ggml_type_size(tensor->type) && return ggml_is_contiguous_0(tensor);
tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
} }
GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) { GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
return ggml_is_contiguous(tensor); return ggml_is_contiguous_n(tensor, 0);
} }
GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) { GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return ggml_is_contiguous_n(tensor, 1);
return
tensor->nb[0] == ggml_type_size(tensor->type) &&
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
} }
GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) { GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return ggml_is_contiguous_n(tensor, 2);
return
tensor->nb[0] == ggml_type_size(tensor->type) &&
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
} }
GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) { GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return return
(t0->ne[0] == t1->ne[0] ) && (t0->ne[0] == t1->ne[0]) &&
(t0->ne[1] == t1->ne[1] ) && (t0->ne[1] == t1->ne[1]) &&
(t0->ne[2] == t1->ne[2] ) && (t0->ne[2] == t1->ne[2]) &&
(t0->ne[3] == t1->ne[3] ); (t0->ne[3] == t1->ne[3]);
} }
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return return
(t0->nb[0] == t1->nb[0] ) && (t0->nb[0] == t1->nb[0]) &&
(t0->nb[1] == t1->nb[1] ) && (t0->nb[1] == t1->nb[1]) &&
(t0->nb[2] == t1->nb[2] ) && (t0->nb[2] == t1->nb[2]) &&
(t0->nb[3] == t1->nb[3] ); (t0->nb[3] == t1->nb[3]);
} }
// check if t1 can be represented as a repeatition of t0 // check if t1 can be represented as a repeatition of t0
@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
switch (tensor->type) { switch (tensor->type) {
case GGML_TYPE_I8: case GGML_TYPE_I8:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
return ((int8_t *)(tensor->data))[i]; return ((int8_t *)(tensor->data))[i];
} }
case GGML_TYPE_I16: case GGML_TYPE_I16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
return ((int16_t *)(tensor->data))[i]; return ((int16_t *)(tensor->data))[i];
} }
case GGML_TYPE_I32: case GGML_TYPE_I32:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
return ((int32_t *)(tensor->data))[i]; return ((int32_t *)(tensor->data))[i];
} }
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
} }
case GGML_TYPE_BF16: case GGML_TYPE_BF16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]); return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
} }
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(float));
return ((float *)(tensor->data))[i]; return ((float *)(tensor->data))[i];
} }
default: default:
@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
switch (tensor->type) { switch (tensor->type) {
case GGML_TYPE_I8: case GGML_TYPE_I8:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
((int8_t *)(tensor->data))[i] = value; ((int8_t *)(tensor->data))[i] = value;
} break; } break;
case GGML_TYPE_I16: case GGML_TYPE_I16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
((int16_t *)(tensor->data))[i] = value; ((int16_t *)(tensor->data))[i] = value;
} break; } break;
case GGML_TYPE_I32: case GGML_TYPE_I32:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
((int32_t *)(tensor->data))[i] = value; ((int32_t *)(tensor->data))[i] = value;
} break; } break;
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
} break; } break;
case GGML_TYPE_BF16: case GGML_TYPE_BF16:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value); ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
} break; } break;
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
GGML_ASSERT(tensor->nb[0] == sizeof(float));
((float *)(tensor->data))[i] = value; ((float *)(tensor->data))[i] = value;
} break; } break;
default: default:
@ -7343,7 +7338,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
return ggml_add_rel_pos_impl(ctx, a, pw, ph, true); return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
} }
// gmml_unary // ggml_unary
static struct ggml_tensor * ggml_unary_impl( static struct ggml_tensor * ggml_unary_impl(
struct ggml_context * ctx, struct ggml_context * ctx,