ggml : add n_as argument to ggml_mul_mat_id

This commit is contained in:
slaren 2023-12-09 12:42:25 +01:00
parent 7372b62271
commit ee8fb399aa
6 changed files with 17 additions and 14 deletions

View File

@ -8244,6 +8244,8 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
const struct ggml_tensor * ids = src0; const struct ggml_tensor * ids = src0;
const int32_t id = dst->op_params[0]; const int32_t id = dst->op_params[0];
const int32_t n_as = dst->op_params[1];
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device]; const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
std::vector<char> ids_host(ggml_nbytes(ids)); std::vector<char> ids_host(ggml_nbytes(ids));
@ -8272,7 +8274,7 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]); const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
GGML_ASSERT(row_id >= 0 && row_id < ids->ne[0]); GGML_ASSERT(row_id >= 0 && row_id < n_as);
const struct ggml_tensor * src0_row = dst->src[row_id + 2]; const struct ggml_tensor * src0_row = dst->src[row_id + 2];

View File

@ -1460,7 +1460,7 @@ void ggml_metal_graph_compute(
GGML_ASSERT(src0t == GGML_TYPE_I32); GGML_ASSERT(src0t == GGML_TYPE_I32);
const int n_as = ne00; const int n_as = ((int32_t *) dst->op_params)[1];
// TODO: make this more general // TODO: make this more general
GGML_ASSERT(n_as <= 8); GGML_ASSERT(n_as <= 8);

14
ggml.c
View File

@ -4076,12 +4076,11 @@ struct ggml_tensor * ggml_mul_mat(
struct ggml_tensor * ggml_mul_mat_id( struct ggml_tensor * ggml_mul_mat_id(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * as[], struct ggml_tensor * as[],
int n_as,
struct ggml_tensor * ids, struct ggml_tensor * ids,
int id, int id,
struct ggml_tensor * b) { struct ggml_tensor * b) {
int64_t n_as = ids->ne[0];
GGML_ASSERT(ids->type == GGML_TYPE_I32); GGML_ASSERT(ids->type == GGML_TYPE_I32);
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1);
GGML_ASSERT(ids->ne[1] == b->ne[1]); GGML_ASSERT(ids->ne[1] == b->ne[1]);
@ -4099,6 +4098,7 @@ struct ggml_tensor * ggml_mul_mat_id(
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
ggml_set_op_params_i32(result, 0, id); ggml_set_op_params_i32(result, 0, id);
ggml_set_op_params_i32(result, 1, n_as);
result->op = GGML_OP_MUL_MAT_ID; result->op = GGML_OP_MUL_MAT_ID;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -4106,8 +4106,7 @@ struct ggml_tensor * ggml_mul_mat_id(
result->src[1] = b; result->src[1] = b;
// TODO: n_as is the selected experts, but it should be the total number of experts // TODO: n_as is the selected experts, but it should be the total number of experts
//for (int64_t i = 0; i < n_as; i++) { for (int i = 0; i < n_as; i++) {
for (int64_t i = 0; i < 8; i++) {
struct ggml_tensor * a = as[i]; struct ggml_tensor * a = as[i];
GGML_ASSERT(ggml_are_same_shape(as[0], a)); GGML_ASSERT(ggml_are_same_shape(as[0], a));
GGML_ASSERT(ggml_can_mul_mat(a, b)); GGML_ASSERT(ggml_can_mul_mat(a, b));
@ -9757,14 +9756,13 @@ static void ggml_compute_forward_mul_mat_id(
} }
const struct ggml_tensor * ids = src0; const struct ggml_tensor * ids = src0;
const int id = ggml_get_op_params_i32(dst, 0); const int id = ggml_get_op_params_i32(dst, 0);
const int n_as = ggml_get_op_params_i32(dst, 1);
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) { for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
const int32_t row_id = *(const int32_t *) ((const char *) ids->data + i01*ids->nb[1] + id*ids->nb[0]); const int32_t row_id = *(const int32_t *) ((const char *) ids->data + i01*ids->nb[1] + id*ids->nb[0]);
// TODO: this assert seems wrong? GGML_ASSERT(row_id >= 0 && row_id < n_as);
//printf("row_id = %d, ids->ne[0] = %d, id = %d\n", row_id, ids->ne[0], id);
//GGML_ASSERT(row_id >= 0 && row_id < ids->ne[0]);
const struct ggml_tensor * src0_row = dst->src[row_id + 2]; const struct ggml_tensor * src0_row = dst->src[row_id + 2];
ggml_compute_forward_mul_mat(params, src0_row, src1, dst, i01, 1); ggml_compute_forward_mul_mat(params, src0_row, src1, dst, i01, 1);

1
ggml.h
View File

@ -1052,6 +1052,7 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_mul_mat_id( GGML_API struct ggml_tensor * ggml_mul_mat_id(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * as[], struct ggml_tensor * as[],
int n_as,
struct ggml_tensor * ids, struct ggml_tensor * ids,
int id, int id,
struct ggml_tensor * b); struct ggml_tensor * b);

View File

@ -4270,11 +4270,11 @@ struct llm_build_context {
ggml_tensor ** ffn_down_exp = (ggml_tensor **) model.layers[il].ffn_down_exp; ggml_tensor ** ffn_down_exp = (ggml_tensor **) model.layers[il].ffn_down_exp;
cur_expert = ggml_mul(ctx0, cur_expert = ggml_mul(ctx0,
ggml_mul_mat_id(ctx0, ffn_up_exp, selected_experts, i, cur), ggml_mul_mat_id(ctx0, ffn_up_exp, n_experts, selected_experts, i, cur),
ggml_silu(ctx0, ggml_silu(ctx0,
ggml_mul_mat_id(ctx0, ffn_gate_exp, selected_experts, i, cur))); // [n_tokens, n_embd] ggml_mul_mat_id(ctx0, ffn_gate_exp, n_experts, selected_experts, i, cur))); // [n_tokens, n_embd]
cur_expert = ggml_mul_mat_id(ctx0, ffn_down_exp, selected_experts, i, cur_expert); // [n_tokens, n_embd] cur_expert = ggml_mul_mat_id(ctx0, ffn_down_exp, n_experts, selected_experts, i, cur_expert); // [n_tokens, n_embd]
cur_expert = ggml_mul(ctx0, cur_expert, cur_expert = ggml_mul(ctx0, cur_expert,
ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i*weights->nb[0])); ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i*weights->nb[0]));

View File

@ -343,6 +343,8 @@ struct test_case {
ud->ok = false; ud->ok = false;
} }
return true; return true;
GGML_UNUSED(index);
}; };
ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud); ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud);
@ -803,7 +805,7 @@ struct test_mul_mat_id : public test_case {
} }
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n); ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
ggml_tensor * b = ggml_new_tensor_2d(ctx, type_b, k, n); ggml_tensor * b = ggml_new_tensor_2d(ctx, type_b, k, n);
ggml_tensor * out = ggml_mul_mat_id(ctx, mats.data(), ids, id, b); ggml_tensor * out = ggml_mul_mat_id(ctx, mats.data(), n_mats, ids, id, b);
return out; return out;
} }