ggml : better PERF prints + support "LLAMA_PERF=1 make"

This commit is contained in:
Georgi Gerganov 2023-04-23 18:15:39 +03:00
parent 53c8434398
commit e4422e299c
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 9 additions and 3 deletions

View File

@ -117,6 +117,10 @@ ifdef LLAMA_GPROF
CFLAGS += -pg CFLAGS += -pg
CXXFLAGS += -pg CXXFLAGS += -pg
endif endif
ifdef LLAMA_PERF
CFLAGS += -DGGML_PERF
CXXFLAGS += -DGGML_PERF
endif
ifneq ($(filter aarch64%,$(UNAME_M)),) ifneq ($(filter aarch64%,$(UNAME_M)),)
CFLAGS += -mcpu=native CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native CXXFLAGS += -mcpu=native

4
ggml.c
View File

@ -11239,7 +11239,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
perf_total_per_op_us[node->op] += node->perf_time_us; perf_total_per_op_us[node->op] += node->perf_time_us;
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
i, i,
node->ne[0], node->ne[1], node->ne[2], node->ne[0], node->ne[1], node->ne[2],
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
@ -11253,7 +11253,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
for (int i = 0; i < cgraph->n_leafs; i++) { for (int i = 0; i < cgraph->n_leafs; i++) {
struct ggml_tensor * node = cgraph->leafs[i]; struct ggml_tensor * node = cgraph->leafs[i];
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n", GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
i, i,
node->ne[0], node->ne[1], node->ne[0], node->ne[1],
GGML_OP_LABEL[node->op]); GGML_OP_LABEL[node->op]);

View File

@ -1250,9 +1250,11 @@ static bool llama_eval_internal(
ggml_build_forward_expand(&gf, inpL); ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute (ctx0, &gf); ggml_graph_compute (ctx0, &gf);
#ifdef GGML_PERF
// print timing information per ggml operation (for debugging purposes) // print timing information per ggml operation (for debugging purposes)
// requires GGML_PERF to be defined // requires GGML_PERF to be defined
//ggml_graph_print(&gf); ggml_graph_print(&gf);
#endif
// plot the computation graph in dot format (for debugging purposes) // plot the computation graph in dot format (for debugging purposes)
//if (n_past%100 == 0) { //if (n_past%100 == 0) {