mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 03:01:45 +00:00
Workrounnd to set node->backend
This commit is contained in:
parent
9106232260
commit
bb590f1482
@ -1599,8 +1599,8 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|||||||
// TODO: find the optimal values for these
|
// TODO: find the optimal values for these
|
||||||
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
||||||
src1->type == GGML_TYPE_F32 &&
|
src1->type == GGML_TYPE_F32 &&
|
||||||
dst->type == GGML_TYPE_F32 &&
|
dst->type == GGML_TYPE_F32 /*&&
|
||||||
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)) {
|
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)*/) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
13
ggml.c
13
ggml.c
@ -15938,6 +15938,18 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
struct ggml_task_stage *stages = node->task_profile.stages;
|
struct ggml_task_stage *stages = node->task_profile.stages;
|
||||||
|
|
||||||
|
// Workrounnd to set node->backend.
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
if (node->backend == GGML_BACKEND_CPU &&
|
||||||
|
(stages[j].backend & GGML_TASK_BACKEND_GPU)) {
|
||||||
|
if (ggml_cpu_has_cublas() || ggml_cpu_has_clblast()) {
|
||||||
|
node->backend = GGML_BACKEND_GPU;
|
||||||
|
} else {
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// compute stage n_tasks.
|
// compute stage n_tasks.
|
||||||
int n_tasks = stages[1].parallel ? n_threads : 1;
|
int n_tasks = stages[1].parallel ? n_threads : 1;
|
||||||
|
|
||||||
@ -16008,6 +16020,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||||||
|
|
||||||
if (comp_backend == GGML_TASK_BACKEND_GPU_CL) {
|
if (comp_backend == GGML_TASK_BACKEND_GPU_CL) {
|
||||||
#if defined(GGML_USE_CLBLAST)
|
#if defined(GGML_USE_CLBLAST)
|
||||||
|
GGML_ASSERT(ggml_cl_can_mul_mat(node->src0, node->src1, node));
|
||||||
cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
|
cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node);
|
||||||
#else
|
#else
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
|
@ -85,7 +85,6 @@ ggml_task_profiles_mock_qxx_provider(struct ggml_tensor *node,
|
|||||||
struct ggml_task_profile *profiles) {
|
struct ggml_task_profile *profiles) {
|
||||||
UNUSED(node);
|
UNUSED(node);
|
||||||
profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU;
|
profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU;
|
||||||
profiles[0].stages[0].backend = GGML_TASK_BACKEND_CPU;
|
|
||||||
profiles[0].stages[1].backend = GGML_TASK_BACKEND_CPU;
|
profiles[0].stages[1].backend = GGML_TASK_BACKEND_CPU;
|
||||||
profiles[1].stages[0].backend = GGML_TASK_BACKEND_CPU;
|
profiles[1].stages[0].backend = GGML_TASK_BACKEND_CPU;
|
||||||
profiles[1].stages[1].backend = GGML_TASK_BACKEND_CPU_BLAS;
|
profiles[1].stages[1].backend = GGML_TASK_BACKEND_CPU_BLAS;
|
||||||
|
Loading…
Reference in New Issue
Block a user