mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 03:14:35 +00:00
ggml : sync (mem align to header + conv_transpose_2d fixes + ggml_alloc) (#2852)
* ggml : sync (mem align to header + conv_transpose_2d fixes) ggml-ci * ggml-alloc : minor fix * ggml-alloc : sync more fixes
This commit is contained in:
parent
92b1bbd2ec
commit
35feac6560
@ -268,7 +268,7 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
|
|||||||
/*.parse_seq = */ {0},
|
/*.parse_seq = */ {0},
|
||||||
/*.parse_seq_len = */ 0,
|
/*.parse_seq_len = */ 0,
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
/*.allocated_tensors = */ = {0},
|
/*.allocated_tensors = */ {0},
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -297,7 +297,7 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
|
|||||||
/*.parse_seq = */ {0},
|
/*.parse_seq = */ {0},
|
||||||
/*.parse_seq_len = */ 0,
|
/*.parse_seq_len = */ 0,
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
/*.allocated_tensors = */ = {0},
|
/*.allocated_tensors = */ {0},
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -556,7 +556,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
|
|||||||
struct ggml_tensor * view_src = get_view_source(parent);
|
struct ggml_tensor * view_src = get_view_source(parent);
|
||||||
struct hash_node * view_src_hn = hash_get(ht, view_src);
|
struct hash_node * view_src_hn = hash_get(ht, view_src);
|
||||||
view_src_hn->n_views -= 1;
|
view_src_hn->n_views -= 1;
|
||||||
AT_PRINTF("view_src %s\n", view_src->name);
|
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);
|
||||||
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
|
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
|
||||||
ggml_allocator_free_tensor(alloc, view_src);
|
ggml_allocator_free_tensor(alloc, view_src);
|
||||||
}
|
}
|
||||||
|
22
ggml.c
22
ggml.c
@ -157,12 +157,6 @@ typedef void * thread_ret_t;
|
|||||||
//#define GGML_SOFT_MAX_ACCELERATE
|
//#define GGML_SOFT_MAX_ACCELERATE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if UINTPTR_MAX == 0xFFFFFFFF
|
|
||||||
#define GGML_MEM_ALIGN 4
|
|
||||||
#else
|
|
||||||
#define GGML_MEM_ALIGN 16
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// logging
|
// logging
|
||||||
//
|
//
|
||||||
@ -7098,11 +7092,13 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
|
ggml_set_op_params_i32(result, 0, stride);
|
||||||
|
|
||||||
result->op = GGML_OP_CONV_TRANSPOSE_2D;
|
result->op = GGML_OP_CONV_TRANSPOSE_2D;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
result->src[1] = b;
|
result->src[1] = b;
|
||||||
result->src[2] = ggml_new_i32(ctx, stride);
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -13498,7 +13494,6 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
const struct ggml_tensor * opt0,
|
|
||||||
struct ggml_tensor * dst) {
|
struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
@ -13558,7 +13553,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int32_t stride = ((const int32_t*)(opt0->data))[0];
|
const int32_t stride = ggml_get_op_params_i32(dst, 0);
|
||||||
|
|
||||||
// total patches in dst
|
// total patches in dst
|
||||||
const int np = ne2;
|
const int np = ne2;
|
||||||
@ -13571,7 +13566,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|||||||
const int ip1 = MIN(ip0 + dp, np);
|
const int ip1 = MIN(ip0 + dp, np);
|
||||||
|
|
||||||
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
||||||
ggml_fp16_t * const wdata_src = (ggml_fp16_t *) params->wdata + nk;
|
ggml_fp16_t * const wdata_src = wdata + nk;
|
||||||
|
|
||||||
for (int i2 = ip0; i2 < ip1; i2++) { // Cout
|
for (int i2 = ip0; i2 < ip1; i2++) { // Cout
|
||||||
float * dst_data = (float *)((char *) dst->data + i2*nb2);
|
float * dst_data = (float *)((char *) dst->data + i2*nb2);
|
||||||
@ -13583,9 +13578,8 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|||||||
for (int i00 = 0; i00 < ne00; i00++) {
|
for (int i00 = 0; i00 < ne00; i00++) {
|
||||||
float v = 0;
|
float v = 0;
|
||||||
ggml_vec_dot_f16(ne03, &v,
|
ggml_vec_dot_f16(ne03, &v,
|
||||||
(ggml_fp16_t *) wdata_src + i1n,
|
wdata_src + i1n,
|
||||||
(ggml_fp16_t *) wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
||||||
|
|
||||||
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -15732,7 +15726,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||||||
} break;
|
} break;
|
||||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||||
{
|
{
|
||||||
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_POOL_1D:
|
case GGML_OP_POOL_1D:
|
||||||
{
|
{
|
||||||
|
18
ggml.h
18
ggml.h
@ -130,13 +130,16 @@
|
|||||||
// The data of the tensor is accessed via the "data" pointer. For example:
|
// The data of the tensor is accessed via the "data" pointer. For example:
|
||||||
//
|
//
|
||||||
// {
|
// {
|
||||||
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
|
// const int nx = 2;
|
||||||
|
// const int ny = 3;
|
||||||
//
|
//
|
||||||
// // a[2, 1] = 1.0f;
|
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
|
||||||
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
|
||||||
//
|
//
|
||||||
// // a[0, 2] = 2.0f;
|
// for (int y = 0; y < ny; y++) {
|
||||||
// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
|
// for (int x = 0; x < nx; x++) {
|
||||||
|
// *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
//
|
//
|
||||||
// ...
|
// ...
|
||||||
// }
|
// }
|
||||||
@ -211,6 +214,11 @@
|
|||||||
#define GGML_MAX_OP_PARAMS 32
|
#define GGML_MAX_OP_PARAMS 32
|
||||||
#define GGML_DEFAULT_N_THREADS 4
|
#define GGML_DEFAULT_N_THREADS 4
|
||||||
|
|
||||||
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
||||||
|
#define GGML_MEM_ALIGN 4
|
||||||
|
#else
|
||||||
|
#define GGML_MEM_ALIGN 16
|
||||||
|
#endif
|
||||||
|
|
||||||
#define GGML_EXIT_SUCCESS 0
|
#define GGML_EXIT_SUCCESS 0
|
||||||
#define GGML_EXIT_ABORTED 1
|
#define GGML_EXIT_ABORTED 1
|
||||||
|
Loading…
Reference in New Issue
Block a user