mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
llama : profiling the attention compute
This commit is contained in:
parent
22c69a2794
commit
2471d56a2e
18
llama.cpp
18
llama.cpp
@ -5815,6 +5815,24 @@ static struct ggml_cgraph * llama_build_graph(
|
|||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
for (int i = 0; i < result->n_nodes; ++i) {
|
||||||
|
struct ggml_tensor * node = result->nodes[i];
|
||||||
|
if (getenv("SKIP_KQ_ALL")) {
|
||||||
|
if (
|
||||||
|
strcmp(node->name, "KQ") == 0 ||
|
||||||
|
strcmp(node->name, "KQ_scaled") == 0 ||
|
||||||
|
strcmp(node->name, "KQ_masked") == 0 ||
|
||||||
|
strcmp(node->name, "KQ_soft_max") == 0 ||
|
||||||
|
strcmp(node->name, "KQV") == 0 ||
|
||||||
|
false) {
|
||||||
|
//printf("skipping %s\n", dst->name);
|
||||||
|
node->op = GGML_OP_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user