From 0ec4dab8649c00519df9f04838e28ca2cafb0e57 Mon Sep 17 00:00:00 2001 From: mqy Date: Sun, 18 Jun 2023 14:59:44 +0800 Subject: [PATCH] fixed break and asssion from select; try fix cuda link error --- ggml-cuda.h | 2 +- ggml-opencl.cpp | 2 +- ggml-opencl.h | 2 +- ggml-threading.c | 8 ++++---- ggml.c | 11 ++++++++--- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/ggml-cuda.h b/ggml-cuda.h index 75ea94392..70bd65e22 100644 --- a/ggml-cuda.h +++ b/ggml-cuda.h @@ -15,8 +15,8 @@ struct ggml_tensor_extra_gpu { void ggml_init_cublas(void); void ggml_cuda_set_tensor_split(const float * tensor_split); +bool ggml_cuda_is_gpu_offloading(struct ggml_tensor * tensor); void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); -bool ggml_cuda_is_gpu_offloading(const struct ggml_tensor * src0); size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 28098793d..3ed9d1adb 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1589,7 +1589,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * } } -bool ggml_cl_is_gpu_offloading(struct ggml_tensor * tensor) { +bool ggml_cl_is_gpu_offloading(const struct ggml_tensor * tensor) { GGML_ASSERT(tensor); return (tensor->src0 && tensor->src0->backend == GGML_BACKEND_GPU) || (tensor->src1 && tensor->src1->backend == GGML_BACKEND_GPU); diff --git a/ggml-opencl.h b/ggml-opencl.h index 1de12f55a..6d815bbf0 100644 --- a/ggml-opencl.h +++ b/ggml-opencl.h @@ -8,8 +8,8 @@ extern "C" { void ggml_cl_init(void); +bool ggml_cl_is_gpu_offloading(const struct ggml_tensor * tensor); void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); -bool ggml_cl_is_gpu_offloading(struct ggml_tensor * tensor); size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); diff --git a/ggml-threading.c b/ggml-threading.c index dada9f3fe..882639666 100644 --- a/ggml-threading.c +++ b/ggml-threading.c @@ -142,7 +142,7 @@ static int sched_yield(void) { #endif struct ggml_perf_stats { - int runs; + atomic_int runs; // total cycles atomic_int cycles; @@ -211,9 +211,9 @@ static inline void ggml_spin_unlock(volatile atomic_flag *obj) { static inline void ggml_perf_collect(struct ggml_perf_stats *st, int64_t c0, int64_t t0) { - st->runs++; - st->cycles += (ggml_cycles() - c0); - st->time_us += (ggml_time_us() - t0); + atomic_fetch_add(&st->runs, 1); + atomic_fetch_add(&st->cycles, (int)(ggml_cycles() - c0)); + atomic_fetch_add(&st->time_us, (int)(ggml_time_us() - t0)); } // A worker thread goes cond waiting. diff --git a/ggml.c b/ggml.c index 62750b20b..5a9e0b33e 100644 --- a/ggml.c +++ b/ggml.c @@ -15468,7 +15468,7 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg // ---- task profiles ---- // Check the type and memeory layout for mul_mat on blas(CPU BLAS) -static bool ggml_mul_mat_check_type_mem(struct ggml_tensor *tensor) { +static bool ggml_mul_mat_check_type_mem(const struct ggml_tensor *tensor) { enum ggml_type src0_t = tensor->src0->type; enum ggml_type src1_t = tensor->src1->type; @@ -15669,6 +15669,8 @@ int ggml_get_task_profiles( p[0].stages[0].valid = true; p[0].stages[1].valid = true; p[0].stages[1].parallel = true; + } else { + GGML_ASSERT(false); } } break; case GGML_OP_SCALE: { @@ -15717,7 +15719,7 @@ int ggml_get_task_profiles( case GGML_OP_FLASH_FF: { p[0].stages[1].valid = true; p[0].stages[1].parallel = true; - } + } break; case GGML_OP_FLASH_ATTN_BACK: { p[0].stages[0].valid = true; p[0].stages[1].valid = true; @@ -15727,11 +15729,12 @@ int ggml_get_task_profiles( case GGML_OP_MAP_BINARY: { p[0].stages[1].valid = true; } break; - case GGML_OP_CROSS_ENTROPY_LOSS: + case GGML_OP_CROSS_ENTROPY_LOSS: { p[0].stages[0].valid = true; p[0].stages[1].valid = true; p[0].stages[1].parallel = true; p[0].stages[2].valid = true; + } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { p[0].stages[1].valid = true; p[0].stages[1].parallel = true; @@ -15764,6 +15767,8 @@ int ggml_get_task_profiles( p[i].stages[0].parallel = true; p[i].stages[1].valid = true; p[i].stages[1].wait = true; + } else { + GGML_ASSERT(false); } ++n_profiles; }