Update CUDA graph on scale change plus clear nodes/params (#9550)

* Avoid using saved CUDA graph if scale changes and reset nodes/params on update Fixes https://github.com/ggerganov/llama.cpp/issues/9451 * clear before resize
2024-12-24 10:24:35 +00:00 · 2024-09-21 01:41:07 +01:00 · 2024-09-21 01:41:07 +01:00 · 41f477879f
commit 41f477879f
parent e948a7da7a
2 changed files with 10 additions and 0 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@ -2478,6 +2478,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p
    for (int i = 0; i < GGML_MAX_SRC; i++) {
        graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;
    }
    memcpy(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS);
 }
 static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
@ -2509,6 +2510,12 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
            return false;
        }
    }
    if (node->op == GGML_OP_SCALE &&
        memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) {
        return false;
    }
    return true;
 }
@ -2720,7 +2727,9 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
            // First call with null argument gets number of nodes in graph
            CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, nullptr, &cuda_ctx->cuda_graph->num_nodes));
            // Subsequent call with non-null argument gets nodes
            cuda_ctx->cuda_graph->nodes.clear();
            cuda_ctx->cuda_graph->nodes.resize(cuda_ctx->cuda_graph->num_nodes);
            cuda_ctx->cuda_graph->params.clear();
            cuda_ctx->cuda_graph->params.resize(cuda_ctx->cuda_graph->num_nodes);
            if (cuda_ctx->cuda_graph->num_nodes > 0) {
                CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, cuda_ctx->cuda_graph->nodes.data(), &cuda_ctx->cuda_graph->num_nodes));
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@ -569,6 +569,7 @@ struct ggml_graph_node_properties {
    int64_t ne[GGML_MAX_DIMS];
    size_t nb[GGML_MAX_DIMS];
    void * src_address[GGML_MAX_SRC];
    int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
 };
 struct ggml_cuda_graph {