mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
add google magika inference example (ggml/748)
* add magika inference example * ggml : fix unaligned accesses in custom ops * ggml : fix FP32 GELU for values that exceed the FP16 range * use ggml_pool_1d * add README * Update README.md * pad inputs if the files are too small * cleanup ggml-ci
This commit is contained in:
parent
5f70671856
commit
2774b0c974
48
ggml.c
48
ggml.c
@ -1608,10 +1608,16 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
|
|||||||
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
||||||
uint16_t t;
|
uint16_t t;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
|
if (x[i] <= -10.0f) {
|
||||||
|
y[i] = 0.0f;
|
||||||
|
} else if (x[i] >= 10.0f) {
|
||||||
|
y[i] = x[i];
|
||||||
|
} else {
|
||||||
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
||||||
memcpy(&t, &fp16, sizeof(uint16_t));
|
memcpy(&t, &fp16, sizeof(uint16_t));
|
||||||
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
||||||
@ -5780,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[2] = {
|
const int64_t ne[4] = {
|
||||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||||
a->ne[1],
|
a->ne[1],
|
||||||
|
a->ne[2],
|
||||||
|
a->ne[3],
|
||||||
};
|
};
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
int32_t params[] = { op, k0, s0, p0 };
|
int32_t params[] = { op, k0, s0, p0 };
|
||||||
ggml_set_op_params(result, params, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
@ -15081,9 +15089,10 @@ static void ggml_compute_forward_map_custom1(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
|
struct ggml_map_custom1_op_params p;
|
||||||
|
memcpy(&p, dst->op_params, sizeof(p));
|
||||||
|
|
||||||
p->fun(dst, a, params->ith, params->nth, p->userdata);
|
p.fun(dst, a, params->ith, params->nth, p.userdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_map_custom2
|
// ggml_compute_forward_map_custom2
|
||||||
@ -15099,9 +15108,10 @@ static void ggml_compute_forward_map_custom2(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
|
struct ggml_map_custom2_op_params p;
|
||||||
|
memcpy(&p, dst->op_params, sizeof(p));
|
||||||
|
|
||||||
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
|
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_map_custom3
|
// ggml_compute_forward_map_custom3
|
||||||
@ -15118,9 +15128,10 @@ static void ggml_compute_forward_map_custom3(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
|
struct ggml_map_custom3_op_params p;
|
||||||
|
memcpy(&p, dst->op_params, sizeof(p));
|
||||||
|
|
||||||
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
|
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_cross_entropy_loss
|
// ggml_compute_forward_cross_entropy_loss
|
||||||
@ -17386,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||||||
} break;
|
} break;
|
||||||
case GGML_OP_MAP_CUSTOM1:
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
{
|
{
|
||||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
struct ggml_map_custom1_op_params p;
|
||||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
memcpy(&p, node->op_params, sizeof(p));
|
||||||
|
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
||||||
n_tasks = n_threads;
|
n_tasks = n_threads;
|
||||||
} else {
|
} else {
|
||||||
n_tasks = MIN(p->n_tasks, n_threads);
|
n_tasks = MIN(p.n_tasks, n_threads);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MAP_CUSTOM2:
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
{
|
{
|
||||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
struct ggml_map_custom2_op_params p;
|
||||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
memcpy(&p, node->op_params, sizeof(p));
|
||||||
|
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
||||||
n_tasks = n_threads;
|
n_tasks = n_threads;
|
||||||
} else {
|
} else {
|
||||||
n_tasks = MIN(p->n_tasks, n_threads);
|
n_tasks = MIN(p.n_tasks, n_threads);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MAP_CUSTOM3:
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
{
|
{
|
||||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
struct ggml_map_custom3_op_params p;
|
||||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
memcpy(&p, node->op_params, sizeof(p));
|
||||||
|
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
||||||
n_tasks = n_threads;
|
n_tasks = n_threads;
|
||||||
} else {
|
} else {
|
||||||
n_tasks = MIN(p->n_tasks, n_threads);
|
n_tasks = MIN(p.n_tasks, n_threads);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||||
|
Loading…
Reference in New Issue
Block a user