mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 19:50:17 +00:00
ggml : better PERF prints + support "LLAMA_PERF=1 make"
This commit is contained in:
parent
53c8434398
commit
e4422e299c
4
Makefile
4
Makefile
@ -117,6 +117,10 @@ ifdef LLAMA_GPROF
|
|||||||
CFLAGS += -pg
|
CFLAGS += -pg
|
||||||
CXXFLAGS += -pg
|
CXXFLAGS += -pg
|
||||||
endif
|
endif
|
||||||
|
ifdef LLAMA_PERF
|
||||||
|
CFLAGS += -DGGML_PERF
|
||||||
|
CXXFLAGS += -DGGML_PERF
|
||||||
|
endif
|
||||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||||
CFLAGS += -mcpu=native
|
CFLAGS += -mcpu=native
|
||||||
CXXFLAGS += -mcpu=native
|
CXXFLAGS += -mcpu=native
|
||||||
|
4
ggml.c
4
ggml.c
@ -11239,7 +11239,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||||||
|
|
||||||
perf_total_per_op_us[node->op] += node->perf_time_us;
|
perf_total_per_op_us[node->op] += node->perf_time_us;
|
||||||
|
|
||||||
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1], node->ne[2],
|
node->ne[0], node->ne[1], node->ne[2],
|
||||||
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
||||||
@ -11253,7 +11253,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||||||
for (int i = 0; i < cgraph->n_leafs; i++) {
|
for (int i = 0; i < cgraph->n_leafs; i++) {
|
||||||
struct ggml_tensor * node = cgraph->leafs[i];
|
struct ggml_tensor * node = cgraph->leafs[i];
|
||||||
|
|
||||||
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n",
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
||||||
i,
|
i,
|
||||||
node->ne[0], node->ne[1],
|
node->ne[0], node->ne[1],
|
||||||
GGML_OP_LABEL[node->op]);
|
GGML_OP_LABEL[node->op]);
|
||||||
|
@ -1250,9 +1250,11 @@ static bool llama_eval_internal(
|
|||||||
ggml_build_forward_expand(&gf, inpL);
|
ggml_build_forward_expand(&gf, inpL);
|
||||||
ggml_graph_compute (ctx0, &gf);
|
ggml_graph_compute (ctx0, &gf);
|
||||||
|
|
||||||
|
#ifdef GGML_PERF
|
||||||
// print timing information per ggml operation (for debugging purposes)
|
// print timing information per ggml operation (for debugging purposes)
|
||||||
// requires GGML_PERF to be defined
|
// requires GGML_PERF to be defined
|
||||||
//ggml_graph_print(&gf);
|
ggml_graph_print(&gf);
|
||||||
|
#endif
|
||||||
|
|
||||||
// plot the computation graph in dot format (for debugging purposes)
|
// plot the computation graph in dot format (for debugging purposes)
|
||||||
//if (n_past%100 == 0) {
|
//if (n_past%100 == 0) {
|
||||||
|
Loading…
Reference in New Issue
Block a user