From bb590f14826ccfd0f182d005a3fe6333da14017f Mon Sep 17 00:00:00 2001 From: mqy Date: Thu, 15 Jun 2023 08:28:39 +0800 Subject: [PATCH] Workrounnd to set node->backend --- ggml-opencl.cpp | 4 ++-- ggml.c | 13 +++++++++++++ tests/test-ggml-tune.c | 1 - 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index b2300a104..c9151a8e4 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1599,8 +1599,8 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens // TODO: find the optimal values for these if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && src1->type == GGML_TYPE_F32 && - dst->type == GGML_TYPE_F32 && - ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)) { + dst->type == GGML_TYPE_F32 /*&& + ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)*/) { return true; } diff --git a/ggml.c b/ggml.c index b75f33b88..b734f1a0c 100644 --- a/ggml.c +++ b/ggml.c @@ -15938,6 +15938,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) struct ggml_task_stage *stages = node->task_profile.stages; + // Workrounnd to set node->backend. + for (int j = 0; j < 3; j++) { + if (node->backend == GGML_BACKEND_CPU && + (stages[j].backend & GGML_TASK_BACKEND_GPU)) { + if (ggml_cpu_has_cublas() || ggml_cpu_has_clblast()) { + node->backend = GGML_BACKEND_GPU; + } else { + GGML_ASSERT(false); + } + } + } + // compute stage n_tasks. int n_tasks = stages[1].parallel ? n_threads : 1; @@ -16008,6 +16020,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) if (comp_backend == GGML_TASK_BACKEND_GPU_CL) { #if defined(GGML_USE_CLBLAST) + GGML_ASSERT(ggml_cl_can_mul_mat(node->src0, node->src1, node)); cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node); #else GGML_ASSERT(false); diff --git a/tests/test-ggml-tune.c b/tests/test-ggml-tune.c index e0a6950d9..913d25ff5 100644 --- a/tests/test-ggml-tune.c +++ b/tests/test-ggml-tune.c @@ -85,7 +85,6 @@ ggml_task_profiles_mock_qxx_provider(struct ggml_tensor *node, struct ggml_task_profile *profiles) { UNUSED(node); profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU; - profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU; profiles[0].stages[1].backend = GGML_TASK_BACKEND_CPU; profiles[1].stages[0].backend = GGML_TASK_BACKEND_CPU; profiles[1].stages[1].backend = GGML_TASK_BACKEND_CPU_BLAS;