ggml : add error handling to graph_compute (whisper/1714)

This commit is contained in:
Finn Voorhees 2024-01-03 08:39:43 -05:00 committed by Georgi Gerganov
parent c1d7cb28d3
commit 1bf681f90e
6 changed files with 18 additions and 11 deletions

View File

@ -90,7 +90,7 @@ extern "C" {
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph without a plan // compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
// check if the backend supports an operation // check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);

View File

@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
ggml_backend_synchronize(backend); ggml_backend_synchronize(backend);
} }
void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
backend->iface.graph_compute(backend, cgraph); if (!backend->iface.graph_compute(backend, cgraph)) {
return false;
}
// TODO: optional sync // TODO: optional sync
ggml_backend_synchronize(backend); ggml_backend_synchronize(backend);
return true;
} }
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac
GGML_UNUSED(backend); GGML_UNUSED(backend);
} }
static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context; struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
cplan.work_data = cpu_ctx->work_data; cplan.work_data = cpu_ctx->work_data;
ggml_graph_compute(cgraph, &cplan); ggml_graph_compute(cgraph, &cplan);
return true;
} }
static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {

View File

@ -58,7 +58,7 @@ extern "C" {
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op); GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends // tensor copy between different backends

View File

@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba
UNUSED(plan); UNUSED(plan);
} }
static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context; ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context;
ggml_cuda_set_main_device(cuda_ctx->device); ggml_cuda_set_main_device(cuda_ctx->device);
@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph
} }
UNUSED(backend); UNUSED(backend);
return true;
} }
static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) { static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {

View File

@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
// same as ggml_graph_compute but uses Metal // same as ggml_graph_compute but uses Metal
// creates gf->n_threads command buffers in parallel // creates gf->n_threads command buffers in parallel
void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
// //
// backend API // backend API

View File

@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) {
return false; return false;
} }
} }
void ggml_metal_graph_compute( bool ggml_metal_graph_compute(
struct ggml_metal_context * ctx, struct ggml_metal_context * ctx,
struct ggml_cgraph * gf) { struct ggml_cgraph * gf) {
@autoreleasepool { @autoreleasepool {
@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute(
MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
if (status != MTLCommandBufferStatusCompleted) { if (status != MTLCommandBufferStatusCompleted) {
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status); GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
GGML_ASSERT(false); return false;
} }
} }
return true;
} }
} }
@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm
UNUSED(backend); UNUSED(backend);
} }
static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
ggml_metal_graph_compute(metal_ctx, cgraph); return ggml_metal_graph_compute(metal_ctx, cgraph);
} }
static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) { static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {