This commit is contained in:
Michael Podvitskiy 2024-09-22 13:40:46 +05:00 committed by GitHub
commit fdd143a581
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -16612,7 +16612,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
}
}
static void llama_graph_compute(
static enum ggml_status llama_graph_compute(
llama_context & lctx,
ggml_cgraph * gf,
int n_threads,
@ -16634,9 +16634,11 @@ static void llama_graph_compute(
}
#endif
ggml_backend_sched_graph_compute_async(lctx.sched, gf);
auto status = ggml_backend_sched_graph_compute_async(lctx.sched, gf);
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
return status;
}
// decode a batch of tokens by evaluating the transformer
@ -16818,7 +16820,18 @@ static int llama_decode_internal(
llama_set_inputs(lctx, ubatch);
llama_graph_compute(lctx, gf, n_threads, threadpool);
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
switch (compute_status) {
case GGML_STATUS_SUCCESS:
break;
case GGML_STATUS_ABORTED:
return 2;
case GGML_STATUS_ALLOC_FAILED:
return -2;
case GGML_STATUS_FAILED:
default:
return -3;
}
// update the kv ring buffer
{
@ -17038,7 +17051,18 @@ static int llama_encode_internal(
llama_set_inputs(lctx, ubatch);
llama_graph_compute(lctx, gf, n_threads, threadpool);
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
switch (compute_status) {
case GGML_STATUS_SUCCESS:
break;
case GGML_STATUS_ABORTED:
return 2;
case GGML_STATUS_ALLOC_FAILED:
return -2;
case GGML_STATUS_FAILED:
default:
return -3;
}
// extract embeddings
if (embd) {