ggml : sync (custom ops) (#2537)

ggml-ci
This commit is contained in:
Georgi Gerganov 2023-08-07 13:20:09 +03:00 committed by GitHub
parent 3d9a551816
commit 99d29c0094
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 417 additions and 115 deletions

387
ggml.c
View File

@ -195,8 +195,8 @@ typedef void * thread_ret_t;
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
#else #else
inline static void* ggml_aligned_malloc(size_t size) { inline static void * ggml_aligned_malloc(size_t size) {
void* aligned_memory = NULL; void * aligned_memory = NULL;
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
int result = posix_memalign(&aligned_memory, getpagesize(), size); int result = posix_memalign(&aligned_memory, getpagesize(), size);
#else #else
@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK", "CROSS_ENTROPY_LOSS_BACK",
}; };
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none", "none",
@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)", "cross_entropy_loss_back(x,y)",
}; };
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
} }
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return return
@ -6890,7 +6890,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0), ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
a->ne[2], 1, 1, a->ne[2], 1, 1,
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { s0, p0, d0 }; int32_t params[] = { s0, p0, d0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, &params, sizeof(params));
@ -6905,10 +6905,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
// ggml_conv_2d // ggml_conv_2d
struct ggml_tensor* ggml_conv_2d( struct ggml_tensor * ggml_conv_2d(
struct ggml_context* ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
int s0, int s0,
int s1, int s1,
int p0, int p0,
@ -6929,7 +6929,7 @@ struct ggml_tensor* ggml_conv_2d(
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1), ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
a->ne[3], b->ne[3], a->ne[3], b->ne[3],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1 }; int32_t params[] = { s0, s1, p0, p1, d0, d1 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, &params, sizeof(params));
@ -6945,7 +6945,7 @@ struct ggml_tensor* ggml_conv_2d(
// ggml_conv_1d_ph // ggml_conv_1d_ph
struct ggml_tensor* ggml_conv_1d_ph( struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
@ -6963,7 +6963,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
// ggml_pool_1d // ggml_pool_1d
struct ggml_tensor* ggml_pool_1d( struct ggml_tensor * ggml_pool_1d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -6982,7 +6982,7 @@ struct ggml_tensor* ggml_pool_1d(
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
a->ne[1], a->ne[1],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { op, k0, s0, p0 }; int32_t params[] = { op, k0, s0, p0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, &params, sizeof(params));
@ -6996,7 +6996,7 @@ struct ggml_tensor* ggml_pool_1d(
// ggml_pool_2d // ggml_pool_2d
struct ggml_tensor* ggml_pool_2d( struct ggml_tensor * ggml_pool_2d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -7019,7 +7019,7 @@ struct ggml_tensor* ggml_pool_2d(
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
a->ne[2], a->ne[2],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, &params, sizeof(params));
@ -7349,7 +7349,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
return ggml_map_binary_impl_f32(ctx, a, b, fun, true); return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
} }
// ggml_map_custom1 // ggml_map_custom1_f32
static struct ggml_tensor * ggml_map_custom1_impl_f32( static struct ggml_tensor * ggml_map_custom1_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7366,7 +7366,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM1; result->op = GGML_OP_MAP_CUSTOM1_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
@ -7387,7 +7387,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
return ggml_map_custom1_impl_f32(ctx, a, fun, true); return ggml_map_custom1_impl_f32(ctx, a, fun, true);
} }
// ggml_map_custom2 // ggml_map_custom2_f32
static struct ggml_tensor * ggml_map_custom2_impl_f32( static struct ggml_tensor * ggml_map_custom2_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7405,7 +7405,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM2; result->op = GGML_OP_MAP_CUSTOM2_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
result->src[1] = b; result->src[1] = b;
@ -7429,7 +7429,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true); return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
} }
// ggml_map_custom3 // ggml_map_custom3_f32
static struct ggml_tensor * ggml_map_custom3_impl_f32( static struct ggml_tensor * ggml_map_custom3_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7448,7 +7448,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM3; result->op = GGML_OP_MAP_CUSTOM3_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
result->src[1] = b; result->src[1] = b;
@ -7475,6 +7475,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true); return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
} }
// ggml_map_custom1
struct ggml_map_custom1_op_params {
ggml_custom1_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom1_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && a->grad) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom1_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM1;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result;
}
struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom1_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
}
// ggml_map_custom2
struct ggml_map_custom2_op_params {
ggml_custom2_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom2_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom2_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM2;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
return result;
}
struct ggml_tensor * ggml_map_custom2(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom2_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
}
// ggml_map_custom3
struct ggml_map_custom3_op_params {
ggml_custom3_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom3_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && (a->grad || b->grad || c->grad)) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom3_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM3;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
result->src[2] = c;
return result;
}
struct ggml_tensor * ggml_map_custom3(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom3_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
}
// ggml_cross_entropy_loss // ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_tensor * ggml_cross_entropy_loss(
@ -9283,8 +9467,8 @@ static void ggml_compute_forward_sum_rows_f32(
for (int64_t i3 = 0; i3 < ne03; i3++) { for (int64_t i3 = 0; i3 < ne03; i3++) {
for (int64_t i2 = 0; i2 < ne02; i2++) { for (int64_t i2 = 0; i2 < ne02; i2++) {
for (int64_t i1 = 0; i1 < ne01; i1++) { for (int64_t i1 = 0; i1 < ne01; i1++) {
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03); float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3); float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
float row_sum = 0; float row_sum = 0;
ggml_vec_sum_f32(ne00, &row_sum, src_row); ggml_vec_sum_f32(ne00, &row_sum, src_row);
dst_row[0] = row_sum; dst_row[0] = row_sum;
@ -12894,7 +13078,7 @@ static void ggml_compute_forward_pool_1d(
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
const int32_t* opts = (const int32_t*)dst->op_params; const int32_t * opts = (const int32_t *)dst->op_params;
enum ggml_op_pool op = opts[0]; enum ggml_op_pool op = opts[0];
const int k0 = opts[1]; const int k0 = opts[1];
const int s0 = opts[2]; const int s0 = opts[2];
@ -14227,24 +14411,6 @@ static void ggml_compute_forward_map_custom1_f32(
fun(dst, a); fun(dst, a);
} }
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom2 // ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2_f32( static void ggml_compute_forward_map_custom2_f32(
@ -14263,24 +14429,6 @@ static void ggml_compute_forward_map_custom2_f32(
} }
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom3 // ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3_f32( static void ggml_compute_forward_map_custom3_f32(
@ -14299,24 +14447,52 @@ static void ggml_compute_forward_map_custom3_f32(
fun(dst, a, b, c); fun(dst, a, b, c);
} }
// ggml_compute_forward_map_custom1
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
p->fun(dst, a, params->ith, params->nth, p->userdata);
}
// ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
}
// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3( static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params, const struct ggml_compute_params * params,
const struct ggml_tensor * a, const struct ggml_tensor * a,
const struct ggml_tensor * b, const struct ggml_tensor * b,
const struct ggml_tensor * c, const struct ggml_tensor * c,
struct ggml_tensor * dst, struct ggml_tensor * dst) {
const ggml_custom3_op_f32_t fun) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
switch (a->type) { return;
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
} }
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
} }
// ggml_compute_forward_cross_entropy_loss // ggml_compute_forward_cross_entropy_loss
@ -14838,25 +15014,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun); ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
} }
break; break;
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1_F32:
{ {
ggml_custom1_op_f32_t fun; ggml_custom1_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun)); memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun); ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM2_F32:
{
ggml_custom2_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM3_F32:
{
ggml_custom3_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM1:
{
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
} }
break; break;
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2:
{ {
ggml_custom2_op_f32_t fun; ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
} }
break; break;
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3:
{ {
ggml_custom3_op_f32_t fun; ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
} }
break; break;
case GGML_OP_CROSS_ENTROPY_LOSS: case GGML_OP_CROSS_ENTROPY_LOSS:
@ -15664,6 +15855,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break; } break;
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1_F32:
case GGML_OP_MAP_CUSTOM2_F32:
case GGML_OP_MAP_CUSTOM3_F32:
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3:
@ -16449,12 +16643,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
case GGML_OP_WIN_UNPART: case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1_F32:
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2_F32:
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3_F32:
{ {
n_tasks = 1; n_tasks = 1;
} break; } break;
case GGML_OP_MAP_CUSTOM1:
{
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_MAP_CUSTOM2:
{
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_MAP_CUSTOM3:
{
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_CROSS_ENTROPY_LOSS: case GGML_OP_CROSS_ENTROPY_LOSS:
{ {
n_tasks = n_threads; n_tasks = n_threads;

145
ggml.h
View File

@ -183,6 +183,15 @@
# define GGML_API # define GGML_API
#endif #endif
// TODO: support for clang
#ifdef __GNUC__
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
#elif defined(_MSC_VER)
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
#else
# define GGML_DEPRECATED(func, hint) func
#endif
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
#include <stdbool.h> #include <stdbool.h>
@ -374,6 +383,10 @@ extern "C" {
GGML_OP_MAP_UNARY, GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY, GGML_OP_MAP_BINARY,
GGML_OP_MAP_CUSTOM1_F32,
GGML_OP_MAP_CUSTOM2_F32,
GGML_OP_MAP_CUSTOM3_F32,
GGML_OP_MAP_CUSTOM1, GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2, GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3, GGML_OP_MAP_CUSTOM3,
@ -570,6 +583,8 @@ extern "C" {
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor); GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
// use this to compute the memory overhead of a tensor // use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void); GGML_API size_t ggml_tensor_overhead(void);
@ -1240,7 +1255,7 @@ extern "C" {
// conv_1d with padding = half // conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d) // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor* ggml_conv_1d_ph( GGML_API struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
@ -1253,7 +1268,7 @@ extern "C" {
GGML_OP_POOL_COUNT, GGML_OP_POOL_COUNT,
}; };
GGML_API struct ggml_tensor* ggml_pool_1d( GGML_API struct ggml_tensor * ggml_pool_1d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -1261,7 +1276,7 @@ extern "C" {
int s0, // stride int s0, // stride
int p0); // padding int p0); // padding
GGML_API struct ggml_tensor* ggml_pool_2d( GGML_API struct ggml_tensor * ggml_pool_2d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -1315,15 +1330,6 @@ extern "C" {
int h0, int h0,
int w); int w);
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_API struct ggml_tensor * ggml_unary( GGML_API struct ggml_tensor * ggml_unary(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
@ -1334,63 +1340,138 @@ extern "C" {
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_unary_op op); enum ggml_unary_op op);
GGML_API struct ggml_tensor * ggml_map_unary_f32( // custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_unary_op_f32_t fun); ggml_unary_op_f32_t fun),
"use ggml_map_custom1 instead");
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_unary_op_f32_t fun); ggml_unary_op_f32_t fun),
"use ggml_map_custom1_inplace instead");
GGML_API struct ggml_tensor * ggml_map_binary_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_binary_op_f32_t fun); ggml_binary_op_f32_t fun),
"use ggml_map_custom2 instead");
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_binary_op_f32_t fun); ggml_binary_op_f32_t fun),
"use ggml_map_custom2_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom1_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_custom1_op_f32_t fun); ggml_custom1_op_f32_t fun),
"use ggml_map_custom1 instead");
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_custom1_op_f32_t fun); ggml_custom1_op_f32_t fun),
"use ggml_map_custom1_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom2_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_custom2_op_f32_t fun); ggml_custom2_op_f32_t fun),
"use ggml_map_custom2 instead");
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_custom2_op_f32_t fun); ggml_custom2_op_f32_t fun),
"use ggml_map_custom2_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom3_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c, struct ggml_tensor * c,
ggml_custom3_op_f32_t fun); ggml_custom3_op_f32_t fun),
"use ggml_map_custom3 instead");
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c, struct ggml_tensor * c,
ggml_custom3_op_f32_t fun); ggml_custom3_op_f32_t fun),
"use ggml_map_custom3_inplace instead");
// custom operators v2
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
#define GGML_N_TASKS_MAX -1
GGML_API struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom2(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom3(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_t fun,
int n_tasks,
void * userdata);
// loss function // loss function