llama : profiling the attention compute

This commit is contained in:
Georgi Gerganov 2023-10-22 09:22:54 +03:00
parent 22c69a2794
commit 2471d56a2e
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -5815,6 +5815,24 @@ static struct ggml_cgraph * llama_build_graph(
GGML_ASSERT(false);
}
#if 1
for (int i = 0; i < result->n_nodes; ++i) {
struct ggml_tensor * node = result->nodes[i];
if (getenv("SKIP_KQ_ALL")) {
if (
strcmp(node->name, "KQ") == 0 ||
strcmp(node->name, "KQ_scaled") == 0 ||
strcmp(node->name, "KQ_masked") == 0 ||
strcmp(node->name, "KQ_soft_max") == 0 ||
strcmp(node->name, "KQV") == 0 ||
false) {
//printf("skipping %s\n", dst->name);
node->op = GGML_OP_NONE;
}
}
}
#endif
return result;
}