ggml : hide ggml_object, ggml_cgraph, ggml_hash_set (#9408)

* ggml : hide ggml_object, ggml_cgraph, ggml_hash_set

ggml-ci

* ggml : add ggml-impl.h to backends

* ggml : fix compiler warnings

ggml-ci

* ggml : add assert upon adding nodes
This commit is contained in:
Georgi Gerganov 2024-09-12 14:23:49 +03:00 committed by GitHub
parent c9c8575a1a
commit d6a04f872d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 170 additions and 129 deletions

View File

@ -183,7 +183,7 @@ int main(int argc, char ** argv) {
ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads); ggml_graph_compute_helper(work_buffer, gf, benchmark_params.n_threads);
TENSOR_DUMP(gf->nodes[0]); TENSOR_DUMP(ggml_graph_node(gf, 0));
printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype)); printf("\n------ Test 2 - Matrix Mult via %s code\n", ggml_type_name(qtype));
@ -224,7 +224,7 @@ int main(int argc, char ** argv) {
// Let's use the F32 result from above as a reference for the quantized multiplication // Let's use the F32 result from above as a reference for the quantized multiplication
float sum_of_F32_reference = tensor_sum_elements(gf->nodes[0]); float sum_of_F32_reference = tensor_sum_elements(ggml_graph_node(gf, 0));
printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n"); printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n");
printf("=====================================================================================\n"); printf("=====================================================================================\n");
@ -252,7 +252,7 @@ int main(int argc, char ** argv) {
// Check that the matrix multiplication result is in the right ballpark // Check that the matrix multiplication result is in the right ballpark
// We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different // We cannot use the exact value from the F32 multiplication because the quantizuation will be slightly different
float sum_of_Q4_result = tensor_sum_elements(gf31->nodes[0]); float sum_of_Q4_result = tensor_sum_elements(ggml_graph_node(gf31, 0));
float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference); float delta = std::abs(sum_of_Q4_result - sum_of_F32_reference);
float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; // Let's accept an epsilon of 10^-6 float allowed_delta = (sum_of_F32_reference) / 1000 / 1000; // Let's accept an epsilon of 10^-6

View File

@ -226,8 +226,8 @@ static ggml_status compute_piter(
result.eigenvectors.resize(params.n_batch); result.eigenvectors.resize(params.n_batch);
result.distances.resize(params.n_batch); result.distances.resize(params.n_batch);
// get output nodes // get output nodes
for (int i = 0; i < gf->n_nodes; ++i) { for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
auto node = gf->nodes[i]; auto node = ggml_graph_node(gf, i);
int iter = -1; int iter = -1;
// find b_tensor (without copying data from device) // find b_tensor (without copying data from device)
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) { if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {

View File

@ -370,7 +370,7 @@ struct lora_merge_ctx {
// write data to output file // write data to output file
{ {
auto result = gf->nodes[gf->n_nodes - 1]; auto * result = ggml_graph_node(gf, -1);
size_t len = ggml_nbytes(result); size_t len = ggml_nbytes(result);
if (read_buf.size() < len) { if (read_buf.size() < len) {
read_buf.resize(len); read_buf.resize(len);

View File

@ -2449,7 +2449,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
ggml_backend_graph_compute(ctx->backend, gf); ggml_backend_graph_compute(ctx->backend, gf);
// the last node is the embedding tensor // the last node is the embedding tensor
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 1]; struct ggml_tensor * embeddings = ggml_graph_node(gf, -1);
// copy the embeddings to the location passed by the user // copy the embeddings to the location passed by the user
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings)); ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));

View File

@ -184,7 +184,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
// ggml_tensor_printf(flatten,"flatten",__LINE__,false,false); // ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
ggml_build_forward_expand(gf, flatten); ggml_build_forward_expand(gf, flatten);
ggml_graph_compute_with_ctx(model.ctx, gf, 1); ggml_graph_compute_with_ctx(model.ctx, gf, 1);
struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1]; struct ggml_tensor* result = ggml_graph_node(gf, -1);
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
// append without newline tokens (default behavior in llava_arch when not using unpad ): // append without newline tokens (default behavior in llava_arch when not using unpad ):

View File

@ -358,6 +358,7 @@ extern "C" {
struct ggml_object; struct ggml_object;
struct ggml_context; struct ggml_context;
struct ggml_cgraph;
// NOTE: always add types at the end of the enum to keep backward compatibility // NOTE: always add types at the end of the enum to keep backward compatibility
enum ggml_type { enum ggml_type {
@ -575,20 +576,6 @@ extern "C" {
GGML_TENSOR_FLAG_PARAM = 4, GGML_TENSOR_FLAG_PARAM = 4,
}; };
// ggml object
struct ggml_object {
size_t offs;
size_t size;
struct ggml_object * next;
enum ggml_object_type type;
char padding[4];
};
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
// n-dimensional tensor // n-dimensional tensor
struct ggml_tensor { struct ggml_tensor {
enum ggml_type type; enum ggml_type type;
@ -671,35 +658,6 @@ extern "C" {
void * abort_callback_data; void * abort_callback_data;
}; };
enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};
typedef uint32_t ggml_bitset_t;
struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};
// computation graph
struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;
struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;
struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
};
// scratch buffer // scratch buffer
struct ggml_scratch { struct ggml_scratch {
size_t offs; size_t offs;
@ -2017,8 +1975,6 @@ extern "C" {
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata); typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata); typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
#define GGML_N_TASKS_MAX -1
GGML_API struct ggml_tensor * ggml_map_custom1( GGML_API struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
@ -2088,7 +2044,6 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * tensor); struct ggml_tensor * tensor);
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep); GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
@ -2096,11 +2051,17 @@ extern "C" {
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads); GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph); GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API size_t ggml_graph_overhead(void); GGML_API size_t ggml_graph_overhead(void);
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads); GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);

View File

@ -1,3 +1,4 @@
#include "ggml-impl.h"
#include "ggml-blas.h" #include "ggml-blas.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"

View File

@ -30,6 +30,7 @@
#include <cstring> #include <cstring>
#include <mutex> #include <mutex>
#include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-cann/aclnn_ops.h" #include "ggml-cann/aclnn_ops.h"
#include "ggml-cann/common.h" #include "ggml-cann/common.h"

View File

@ -1,5 +1,5 @@
#include "ggml-cuda.h" #include "ggml-cuda.h"
#include "ggml.h" #include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-cuda/common.cuh" #include "ggml-cuda/common.cuh"

View File

@ -629,8 +629,16 @@ inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif #endif
enum ggml_cgraph_eval_order {
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
GGML_CGRAPH_EVAL_ORDER_COUNT
};
// bitset // bitset
typedef uint32_t ggml_bitset_t;
static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated"); static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8) #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1) #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
@ -656,6 +664,12 @@ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
#define GGML_HASHSET_FULL ((size_t)-1) #define GGML_HASHSET_FULL ((size_t)-1)
#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2) #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
struct ggml_hash_set {
size_t size;
ggml_bitset_t * used; // whether or not the keys are in use i.e. set
struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
};
struct ggml_hash_set ggml_hash_set_new(size_t size); struct ggml_hash_set ggml_hash_set_new(size_t size);
void ggml_hash_set_free(struct ggml_hash_set * hash_set); void ggml_hash_set_free(struct ggml_hash_set * hash_set);
@ -745,6 +759,24 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
// computation graph
struct ggml_cgraph {
int size;
int n_nodes;
int n_leafs;
struct ggml_tensor ** nodes;
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;
struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
};
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -1,4 +1,4 @@
#include "ggml.h" #include "ggml-impl.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-kompute.h" #include "ggml-kompute.h"

View File

@ -1,7 +1,7 @@
#import "ggml-metal.h" #import "ggml-metal.h"
#import "ggml-impl.h"
#import "ggml-backend-impl.h" #import "ggml-backend-impl.h"
#import "ggml.h"
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>

View File

@ -1,5 +1,5 @@
#include "ggml-rpc.h" #include "ggml-rpc.h"
#include "ggml.h" #include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include <cinttypes> #include <cinttypes>

View File

@ -33,7 +33,7 @@
#include <sycl/half_type.hpp> #include <sycl/half_type.hpp>
#include "ggml-sycl.h" #include "ggml-sycl.h"
#include "ggml.h" #include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-sycl/backend.hpp" #include "ggml-sycl/backend.hpp"

View File

@ -21,7 +21,7 @@
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include "ggml.h" #include "ggml-impl.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-vulkan-shaders.hpp" #include "ggml-vulkan-shaders.hpp"

View File

@ -287,6 +287,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
#define GGML_DEBUG 0 #define GGML_DEBUG 0
#define GGML_GELU_FP16 #define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16 #define GGML_GELU_QUICK_FP16
#define GGML_N_TASKS_MAX (-1)
#define GGML_SOFT_MAX_UNROLL 4 #define GGML_SOFT_MAX_UNROLL 4
#define GGML_VEC_DOT_UNROLL 2 #define GGML_VEC_DOT_UNROLL 2
@ -1124,17 +1125,17 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
{ \ { \
int offset = GGML_F32_ARR >> 1; \ int offset = GGML_F32_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f32(x[i], x[offset+i]); \ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
} \ } \
offset >>= 1; \ offset >>= 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f32(x[i], x[offset+i]); \ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
} \ } \
offset >>= 1; \ offset >>= 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f32(x[i], x[offset+i]); \ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
} \ } \
res = GGML_F32x4_REDUCE_ONE(x[0]); \ (res) = GGML_F32x4_REDUCE_ONE((x)[0]); \
} }
#define GGML_F32_VEC GGML_F32x4 #define GGML_F32_VEC GGML_F32x4
@ -1165,26 +1166,26 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
do { \ do { \
int offset = GGML_F16_ARR >> 1; \ int offset = GGML_F16_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f16(x[i], x[offset+i]); \ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
} \ } \
offset >>= 1; \ offset >>= 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f16(x[i], x[offset+i]); \ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
} \ } \
offset >>= 1; \ offset >>= 1; \
for (int i = 0; i < offset; ++i) { \ for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f16(x[i], x[offset+i]); \ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
} \ } \
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \ const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 ((x)[0])); \
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \ const float32x4_t t1 = vcvt_f32_f16(vget_high_f16((x)[0])); \
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \ (res) = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
} while (0) } while (0)
#define GGML_F16_VEC GGML_F16x8 #define GGML_F16_VEC GGML_F16x8
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO #define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
#define GGML_F16_VEC_SET1 GGML_F16x8_SET1 #define GGML_F16_VEC_SET1 GGML_F16x8_SET1
#define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p) #define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p)
#define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((ggml_fp16_internal_t *)(p), r[i]) #define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((ggml_fp16_internal_t *)(p), (r)[i])
#define GGML_F16_VEC_FMA GGML_F16x8_FMA #define GGML_F16_VEC_FMA GGML_F16x8_FMA
#define GGML_F16_VEC_ADD GGML_F16x8_ADD #define GGML_F16_VEC_ADD GGML_F16x8_ADD
#define GGML_F16_VEC_MUL GGML_F16x8_MUL #define GGML_F16_VEC_MUL GGML_F16x8_MUL
@ -1893,6 +1894,23 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
#define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR) #define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR)
#endif #endif
//
// ggml object
//
struct ggml_object {
size_t offs;
size_t size;
struct ggml_object * next;
enum ggml_object_type type;
char padding[4];
};
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
// //
// ggml context // ggml context
// //
@ -19161,6 +19179,34 @@ void ggml_graph_clear(struct ggml_cgraph * cgraph) {
ggml_hash_set_reset(&cgraph->visited_hash_set); ggml_hash_set_reset(&cgraph->visited_hash_set);
} }
int ggml_graph_size(struct ggml_cgraph * cgraph) {
return cgraph->size;
}
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
if (i < 0) {
GGML_ASSERT(cgraph->n_nodes + i >= 0);
return cgraph->nodes[cgraph->n_nodes + i];
}
GGML_ASSERT(i < cgraph->n_nodes);
return cgraph->nodes[i];
}
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
return cgraph->nodes;
}
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
return cgraph->n_nodes;
}
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
GGML_ASSERT(cgraph->size > cgraph->n_nodes);
cgraph->nodes[cgraph->n_nodes] = tensor;
cgraph->n_nodes++;
}
// Android's libc implementation "bionic" does not support setting affinity // Android's libc implementation "bionic" does not support setting affinity
#if defined(__gnu_linux__) #if defined(__gnu_linux__)
static void set_numa_thread_affinity(int thread_n) { static void set_numa_thread_affinity(int thread_n) {

View File

@ -9877,8 +9877,8 @@ struct llm_build_context {
struct ggml_cgraph * append_pooling(struct ggml_cgraph * gf) { struct ggml_cgraph * append_pooling(struct ggml_cgraph * gf) {
// find result_norm tensor for input // find result_norm tensor for input
struct ggml_tensor * inp = nullptr; struct ggml_tensor * inp = nullptr;
for (int i = gf->n_nodes - 1; i >= 0; --i) { for (int i = ggml_graph_n_nodes(gf) - 1; i >= 0; --i) {
inp = gf->nodes[i]; inp = ggml_graph_node(gf, i);
if (strcmp(inp->name, "result_norm") == 0 || strcmp(inp->name, "result_embd") == 0) { if (strcmp(inp->name, "result_norm") == 0 || strcmp(inp->name, "result_embd") == 0) {
break; break;
} else { } else {
@ -16207,8 +16207,8 @@ static int llama_decode_internal(
ggml_cgraph * gf = llama_build_graph(lctx, ubatch, false); ggml_cgraph * gf = llama_build_graph(lctx, ubatch, false);
// the output is always the last tensor in the graph // the output is always the last tensor in the graph
struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; struct ggml_tensor * res = ggml_graph_node(gf, -1);
struct ggml_tensor * embd = gf->nodes[gf->n_nodes - 2]; struct ggml_tensor * embd = ggml_graph_node(gf, -2);
if (lctx.n_outputs == 0) { if (lctx.n_outputs == 0) {
// no output // no output
@ -16217,9 +16217,9 @@ static int llama_decode_internal(
} else if (cparams.embeddings) { } else if (cparams.embeddings) {
res = nullptr; // do not extract logits for embedding case res = nullptr; // do not extract logits for embedding case
embd = nullptr; embd = nullptr;
for (int i = gf->n_nodes - 1; i >= 0; --i) { for (int i = ggml_graph_n_nodes(gf) - 1; i >= 0; --i) {
if (strcmp(gf->nodes[i]->name, "result_embd_pooled") == 0) { if (strcmp(ggml_graph_node(gf, i)->name, "result_embd_pooled") == 0) {
embd = gf->nodes[i]; embd = ggml_graph_node(gf, i);
break; break;
} }
} }
@ -16436,15 +16436,15 @@ static int llama_encode_internal(
// there are two cases here // there are two cases here
if (llama_model_has_decoder(&lctx.model)) { if (llama_model_has_decoder(&lctx.model)) {
// first case is an encoder-decoder T5 model where embeddings are passed to decoder // first case is an encoder-decoder T5 model where embeddings are passed to decoder
embd = gf->nodes[gf->n_nodes - 1]; embd = ggml_graph_node(gf, -1);
GGML_ASSERT(strcmp(embd->name, "result_norm") == 0 && "missing result_output tensor"); GGML_ASSERT(strcmp(embd->name, "result_norm") == 0 && "missing result_output tensor");
} else { } else {
// second case is an encoder-only T5 model // second case is an encoder-only T5 model
if (cparams.embeddings) { if (cparams.embeddings) {
// only output embeddings if required // only output embeddings if required
embd = gf->nodes[gf->n_nodes - 1]; embd = ggml_graph_node(gf, -1);
if (strcmp(embd->name, "result_embd_pooled") != 0) { if (strcmp(embd->name, "result_embd_pooled") != 0) {
embd = gf->nodes[gf->n_nodes - 2]; embd = ggml_graph_node(gf, -2);
} }
GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0 && "missing embeddings tensor"); GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0 && "missing embeddings tensor");
} }
@ -18492,7 +18492,7 @@ struct llama_context * llama_new_context_with_model(
// note: the number of splits during measure is higher than during inference due to the kv shift // note: the number of splits during measure is higher than during inference due to the kv shift
int n_splits = ggml_backend_sched_get_n_splits(ctx->sched); int n_splits = ggml_backend_sched_get_n_splits(ctx->sched);
LLAMA_LOG_INFO("%s: graph nodes = %d\n", __func__, gf->n_nodes); LLAMA_LOG_INFO("%s: graph nodes = %d\n", __func__, ggml_graph_n_nodes(gf));
LLAMA_LOG_INFO("%s: graph splits = %d\n", __func__, n_splits); LLAMA_LOG_INFO("%s: graph splits = %d\n", __func__, n_splits);
} }
} }

View File

@ -519,7 +519,7 @@ struct test_case {
// add sentinels as graph nodes so that they are checked in the callback // add sentinels as graph nodes so that they are checked in the callback
for (ggml_tensor * sentinel : sentinels) { for (ggml_tensor * sentinel : sentinels) {
gf->nodes[gf->n_nodes++] = sentinel; ggml_graph_add_node(gf, sentinel);
} }
// randomize tensors // randomize tensors
@ -679,9 +679,9 @@ struct test_case {
// duplicate the op // duplicate the op
size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1; int n_runs = std::min((size_t) ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
for (int i = 1; i < n_runs; i++) { for (int i = 1; i < n_runs; i++) {
gf->nodes[gf->n_nodes++] = out; ggml_graph_add_node(gf, out);
} }
// calculate memory // calculate memory
@ -696,11 +696,11 @@ struct test_case {
} }
return size; return size;
}; };
for (int i = 0; i < gf->n_nodes; i++) { for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
if (ggml_is_view_op(gf->nodes[i]->op) || gf->nodes[i] == out) { if (ggml_is_view_op(ggml_graph_node(gf, i)->op) || ggml_graph_node(gf, i) == out) {
continue; continue;
} }
mem += tensor_op_size(gf->nodes[i]); mem += tensor_op_size(ggml_graph_node(gf, i));
} }
// run // run
@ -804,7 +804,7 @@ struct test_case {
ggml_graph_cpy(gf, gb); ggml_graph_cpy(gf, gb);
ggml_build_backward_expand(ctx, gf, gb, false); ggml_build_backward_expand(ctx, gf, gb, false);
if (expect.size() != 1 || expect[0] != 0.0f) { if (expect.size() != 1 || expect[0] != 0.0f) {
GGML_ASSERT(gb->n_nodes > gf->n_nodes); GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || t->grad->op != GGML_OP_NONE); GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || t->grad->op != GGML_OP_NONE);
} }