mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-08 09:41:45 +00:00
profiler: add support for different outputs
This commit is contained in:
parent
d203400cc9
commit
6cea9d8df2
@ -191,7 +191,7 @@ struct ggml_cgraph {
|
|||||||
struct ggml_tensor ** grads;
|
struct ggml_tensor ** grads;
|
||||||
struct ggml_tensor ** leafs;
|
struct ggml_tensor ** leafs;
|
||||||
|
|
||||||
struct ggml_profile_data ** prof;
|
struct ggml_profile_data * prof;
|
||||||
|
|
||||||
struct ggml_hash_set visited_hash_set;
|
struct ggml_hash_set visited_hash_set;
|
||||||
|
|
||||||
|
@ -2,42 +2,63 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
#ifdef GGML_GRAPH_PROFILER
|
#ifdef GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
extern "C" void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
|
struct ggml_profile_output {
|
||||||
|
const char * prefix;
|
||||||
|
FILE * stream;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C" void ggml_graph_profile_init(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
if (!getenv("GGML_GRAPH_PROFILE")) { return; }
|
// TODO: make this a param
|
||||||
|
const char *env = getenv("GGML_GRAPH_PROFILE");
|
||||||
|
if (!env) { return; }
|
||||||
|
|
||||||
// The number of threads may change between passes (pp vs tg).
|
// The number of threads may change between passes (pp vs tg).
|
||||||
// Allocate for max_n_threads for simplicity for now.
|
// Allocate for max_n_threads for simplicity for now.
|
||||||
// TODO: use aligned allocator
|
// TODO: use aligned allocator
|
||||||
|
|
||||||
size_t node_size = sizeof(struct ggml_profile_data) * GGML_MAX_N_THREADS;
|
size_t node_size = sizeof(struct ggml_profile_timing) * GGML_MAX_N_THREADS;
|
||||||
size_t pvec_size = sizeof(std::intptr_t) * cg->n_nodes;
|
size_t pvec_size = sizeof(std::intptr_t) * cg->n_nodes;
|
||||||
size_t data_size = node_size * cg->n_nodes;
|
size_t time_size = node_size * cg->n_nodes;
|
||||||
size_t t_size = pvec_size + data_size;
|
size_t t_size = pvec_size + time_size + sizeof(ggml_profile_output) + sizeof(ggml_profile_data);
|
||||||
|
|
||||||
cg->prof = (struct ggml_profile_data **) malloc(t_size);
|
uint8_t * ptr = (uint8_t *) malloc(t_size);
|
||||||
if (!cg->prof) {
|
if (!ptr) {
|
||||||
fprintf(stderr, "ggml-profile: failed to allocate profiling data : n_threads %d n_nodes %d\n", n_threads, cg->n_nodes);
|
fprintf(stderr, "ggml-profile: failed to allocate profiling data : n_threads %d n_nodes %d\n", n_threads, cg->n_nodes);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
memset(ptr, 0, t_size);
|
||||||
|
|
||||||
memset(cg->prof, 0, t_size);
|
// init all pointers
|
||||||
|
cg->prof = (ggml_profile_data *) ptr; ptr += sizeof(ggml_profile_data);
|
||||||
// init pre-thread pointers
|
cg->prof->output = (ggml_profile_output *) ptr; ptr += sizeof(ggml_profile_output);
|
||||||
uint8_t * data = (uint8_t *) cg->prof + pvec_size;
|
cg->prof->timing = (ggml_profile_timing **) ptr; ptr += pvec_size;
|
||||||
for (int i=0; i < cg->n_nodes; i++) {
|
for (int i=0; i < cg->n_nodes; i++) {
|
||||||
cg->prof[i] = (struct ggml_profile_data *) data; data += node_size;
|
cg->prof->timing[i] = (struct ggml_profile_timing *) ptr; ptr += node_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// init the output
|
||||||
|
ggml_profile_output *out = cg->prof->output;
|
||||||
|
if (!strcmp("stderr", env) || !strcmp("1", env)) {
|
||||||
|
out->prefix = "ggml-profile:";
|
||||||
|
out->stream = stderr;
|
||||||
|
} else {
|
||||||
|
out->prefix = "";
|
||||||
|
out->stream = fopen(env, "w");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads)
|
extern "C" void ggml_graph_profile_start(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
if (!cg->prof) { ggml_profile_graph_init(cg, n_threads); }
|
if (!cg->prof) { ggml_graph_profile_init(cg, n_threads); }
|
||||||
if (!cg->prof) { return; }
|
if (!cg->prof) { return; }
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,13 +110,14 @@ static inline void ggml_profile_format_op_types(char *str, struct ggml_tensor *t
|
|||||||
p += sprintf(p, "%3s", ggml_type_name(t->type));
|
p += sprintf(p, "%3s", ggml_type_name(t->type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" void ggml_graph_profile_finish(struct ggml_cgraph *cg, int n_threads)
|
||||||
extern "C" void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads)
|
|
||||||
{
|
{
|
||||||
if (!cg->prof) { return; }
|
if (!cg->prof) { return; }
|
||||||
|
|
||||||
fprintf(stderr, "ggml-profile: | node idx | op name | proc (nsec) | sync (nsec) | total (nsec) | op dims | op types | tensor name |\n");
|
ggml_profile_output *out = cg->prof->output;
|
||||||
fprintf(stderr, "ggml-profile: | -------: | :------ | ----------: | ----------: | -----------: | ------: | -------: | ----------: |\n");
|
|
||||||
|
fprintf(out->stream, "%s| node idx | op name | proc (nsec) | sync (nsec) | total (nsec) | op dims | op types | tensor name |\n", out->prefix);
|
||||||
|
fprintf(out->stream, "%s| -------: | :------ | ----------: | ----------: | -----------: | ------: | -------: | ----------: |\n", out->prefix);
|
||||||
|
|
||||||
char dims[64 * GGML_MAX_SRC];
|
char dims[64 * GGML_MAX_SRC];
|
||||||
char types[16 * GGML_MAX_SRC];
|
char types[16 * GGML_MAX_SRC];
|
||||||
@ -107,39 +129,48 @@ extern "C" void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads)
|
|||||||
|
|
||||||
// add up per thread counters and reset them
|
// add up per thread counters and reset them
|
||||||
for (int t=0; t < n_threads; t++) {
|
for (int t=0; t < n_threads; t++) {
|
||||||
p_nsec += cg->prof[i][t].nsec[GGML_PROF_OP_SYNC] - cg->prof[i][t].nsec[GGML_PROF_OP_START];
|
ggml_profile_timing &timing = cg->prof->timing[i][t];
|
||||||
s_nsec += cg->prof[i][t].nsec[GGML_PROF_OP_END] - cg->prof[i][t].nsec[GGML_PROF_OP_SYNC];
|
|
||||||
t_nsec += cg->prof[i][t].nsec[GGML_PROF_OP_END] - cg->prof[i][t].nsec[GGML_PROF_OP_START];
|
|
||||||
|
|
||||||
cg->prof[i][t].nsec[GGML_PROF_OP_START] = 0;
|
p_nsec += timing.nsec[GGML_PROF_OP_SYNC] - timing.nsec[GGML_PROF_OP_START];
|
||||||
cg->prof[i][t].nsec[GGML_PROF_OP_SYNC] = 0;
|
s_nsec += timing.nsec[GGML_PROF_OP_END] - timing.nsec[GGML_PROF_OP_SYNC];
|
||||||
cg->prof[i][t].nsec[GGML_PROF_OP_END] = 0;
|
t_nsec += timing.nsec[GGML_PROF_OP_END] - timing.nsec[GGML_PROF_OP_START];
|
||||||
|
|
||||||
|
timing.nsec[GGML_PROF_OP_START] = 0;
|
||||||
|
timing.nsec[GGML_PROF_OP_SYNC] = 0;
|
||||||
|
timing.nsec[GGML_PROF_OP_END] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_profile_format_op_dims(dims, cg->nodes[i]);
|
ggml_profile_format_op_dims(dims, cg->nodes[i]);
|
||||||
ggml_profile_format_op_types(types, cg->nodes[i]);
|
ggml_profile_format_op_types(types, cg->nodes[i]);
|
||||||
|
|
||||||
fprintf(stderr, "ggml-profile: | %04d | %10s | %10lu | %10lu | %10lu | %46s | %22s | %20s |\n",
|
fprintf(out->stream, "%s| %04d | %10s | %10lu | %10lu | %10lu | %46s | %22s | %20s |\n", out->prefix,
|
||||||
i, ggml_op_name(cg->nodes[i]->op),
|
i, ggml_op_name(cg->nodes[i]->op),
|
||||||
(unsigned long) p_nsec, (unsigned long) s_nsec, (unsigned long) t_nsec,
|
(unsigned long) p_nsec, (unsigned long) s_nsec, (unsigned long) t_nsec,
|
||||||
dims, types, cg->nodes[i]->name);
|
dims, types, cg->nodes[i]->name);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "ggml-profile: \n"); // empty line to split tables
|
fprintf(out->stream, "%s \n", out->prefix); // empty line to split tables
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void ggml_profile_graph_free(struct ggml_cgraph *cg)
|
extern "C" void ggml_graph_profile_free(struct ggml_cgraph *cg)
|
||||||
{
|
{
|
||||||
if (!cg->prof) { return; }
|
if (!cg->prof) { return; }
|
||||||
|
|
||||||
|
ggml_profile_output *out = cg->prof->output;
|
||||||
|
if (out->stream != stderr) {
|
||||||
|
fclose(out->stream);
|
||||||
|
}
|
||||||
|
|
||||||
free(cg->prof); cg->prof = nullptr;
|
free(cg->prof); cg->prof = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
|
extern "C" void ggml_graph_profile_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
|
||||||
{
|
{
|
||||||
if (!cg->prof) { return; }
|
if (!cg->prof) { return; }
|
||||||
|
|
||||||
using clock = std::chrono::high_resolution_clock;
|
using clock = std::chrono::high_resolution_clock;
|
||||||
cg->prof[node_n][ith].nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
|
||||||
|
ggml_profile_timing &timing = cg->prof->timing[node_n][ith];
|
||||||
|
timing.nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // GGML_GRAPH_PROFILER
|
#endif // GGML_GRAPH_PROFILER
|
||||||
|
@ -8,45 +8,66 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// op profile data (per op / per thread)
|
// op profile events & timing (per op / per thread)
|
||||||
enum ggml_profile_event {
|
enum ggml_profile_event {
|
||||||
GGML_PROF_OP_START,
|
GGML_PROF_OP_START,
|
||||||
GGML_PROF_OP_SYNC,
|
GGML_PROF_OP_SYNC,
|
||||||
GGML_PROF_OP_END
|
GGML_PROF_OP_END
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_profile_data {
|
struct ggml_profile_timing {
|
||||||
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
|
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ggml_profile_output;
|
||||||
|
|
||||||
|
struct ggml_profile_data {
|
||||||
|
struct ggml_profile_output *output;
|
||||||
|
struct ggml_profile_timing ** timing; // per op / per thread timing
|
||||||
|
};
|
||||||
|
|
||||||
|
// check if profiling is enabled for this graph
|
||||||
|
static inline bool ggml_graph_profile_enabled(const struct ggml_cgraph *cg)
|
||||||
|
{
|
||||||
|
return cg->prof != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get pointer to the timing data for specific node / thread
|
||||||
|
// can be used by the backends to populate data collected internally
|
||||||
|
static inline struct ggml_profile_timing * ggml_graph_profile_timing(const struct ggml_cgraph *cg, int node_n, int ith)
|
||||||
|
{
|
||||||
|
if (!cg->prof) { return NULL; }
|
||||||
|
return &cg->prof->timing[node_n][ith];
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef GGML_GRAPH_PROFILER
|
#ifndef GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
// Stub out all profiler functions
|
// Stub out all profiler functions
|
||||||
|
|
||||||
static inline void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
|
static inline void ggml_graph_profile_init(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
GGML_UNUSED(cg);
|
GGML_UNUSED(cg);
|
||||||
GGML_UNUSED(n_threads);
|
GGML_UNUSED(n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads)
|
static inline void ggml_graph_profile_start(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
GGML_UNUSED(cg);
|
GGML_UNUSED(cg);
|
||||||
GGML_UNUSED(n_threads);
|
GGML_UNUSED(n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads)
|
static inline void ggml_graph_profile_finish(struct ggml_cgraph *cg, int n_threads)
|
||||||
{
|
{
|
||||||
GGML_UNUSED(cg);
|
GGML_UNUSED(cg);
|
||||||
GGML_UNUSED(n_threads);
|
GGML_UNUSED(n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ggml_profile_graph_free(struct ggml_cgraph *cg)
|
static inline void ggml_graph_profile_free(struct ggml_cgraph *cg)
|
||||||
{
|
{
|
||||||
GGML_UNUSED(cg);
|
GGML_UNUSED(cg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
|
static inline void ggml_graph_profile_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
|
||||||
{
|
{
|
||||||
GGML_UNUSED(cg);
|
GGML_UNUSED(cg);
|
||||||
GGML_UNUSED(e);
|
GGML_UNUSED(e);
|
||||||
@ -56,11 +77,11 @@ static inline void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads);
|
void ggml_graph_profile_init(struct ggml_cgraph *cg, int n_threads);
|
||||||
void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads);
|
void ggml_graph_profile_start(struct ggml_cgraph *cg, int n_threads);
|
||||||
void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads);
|
void ggml_graph_profile_finish(struct ggml_cgraph *cg, int n_threads);
|
||||||
void ggml_profile_graph_free(struct ggml_cgraph *cg);
|
void ggml_graph_profile_free(struct ggml_cgraph *cg);
|
||||||
void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
|
void ggml_graph_profile_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
|
||||||
|
|
||||||
#endif // GGML_GRAPH_PROFILER
|
#endif // GGML_GRAPH_PROFILER
|
||||||
|
|
||||||
|
@ -19876,7 +19876,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
for (int node_n = 0; node_n < cgraph->n_nodes && !tp->abort; node_n++) {
|
for (int node_n = 0; node_n < cgraph->n_nodes && !tp->abort; node_n++) {
|
||||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||||
|
|
||||||
ggml_profile_op_event(cgraph, GGML_PROF_OP_START, node_n, state->ith);
|
ggml_graph_profile_event(cgraph, GGML_PROF_OP_START, node_n, state->ith);
|
||||||
|
|
||||||
ggml_compute_forward(¶ms, node);
|
ggml_compute_forward(¶ms, node);
|
||||||
|
|
||||||
@ -19886,11 +19886,16 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
tp->ec = GGML_STATUS_ABORTED;
|
tp->ec = GGML_STATUS_ABORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_profile_op_event(cgraph, GGML_PROF_OP_SYNC, node_n, state->ith);
|
ggml_graph_profile_event(cgraph, GGML_PROF_OP_SYNC, node_n, state->ith);
|
||||||
|
|
||||||
ggml_barrier(state->threadpool);
|
ggml_barrier(state->threadpool);
|
||||||
|
|
||||||
ggml_profile_op_event(cgraph, GGML_PROF_OP_END, node_n, state->ith);
|
ggml_graph_profile_event(cgraph, GGML_PROF_OP_END, node_n, state->ith);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ggml_graph_profile_enabled(cgraph)) {
|
||||||
|
// need another barrier to flush the last timing update
|
||||||
|
ggml_barrier(state->threadpool);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -20163,7 +20168,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|||||||
threadpool->ec = GGML_STATUS_SUCCESS;
|
threadpool->ec = GGML_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_profile_graph_start(cgraph, n_threads);
|
ggml_graph_profile_start(cgraph, n_threads);
|
||||||
|
|
||||||
#ifdef GGML_USE_OPENMP
|
#ifdef GGML_USE_OPENMP
|
||||||
if (n_threads > 1) {
|
if (n_threads > 1) {
|
||||||
@ -20195,6 +20200,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|||||||
ggml_graph_compute_thread(&threadpool->workers[0]);
|
ggml_graph_compute_thread(&threadpool->workers[0]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ggml_graph_profile_finish(cgraph, n_threads);
|
||||||
|
|
||||||
// don't leave affinity set on the main thread
|
// don't leave affinity set on the main thread
|
||||||
clear_numa_thread_affinity();
|
clear_numa_thread_affinity();
|
||||||
|
|
||||||
@ -20204,8 +20211,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|||||||
ggml_threadpool_free(threadpool);
|
ggml_threadpool_free(threadpool);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_profile_graph_finish(cgraph, n_threads);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user