mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-09-22 21:16:20 +00:00
Merge 95ce058c2b
into a5b57b08ce
This commit is contained in:
commit
fdd143a581
@ -16612,7 +16612,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
static void llama_graph_compute(
|
||||
static enum ggml_status llama_graph_compute(
|
||||
llama_context & lctx,
|
||||
ggml_cgraph * gf,
|
||||
int n_threads,
|
||||
@ -16634,9 +16634,11 @@ static void llama_graph_compute(
|
||||
}
|
||||
#endif
|
||||
|
||||
ggml_backend_sched_graph_compute_async(lctx.sched, gf);
|
||||
auto status = ggml_backend_sched_graph_compute_async(lctx.sched, gf);
|
||||
|
||||
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// decode a batch of tokens by evaluating the transformer
|
||||
@ -16818,7 +16820,18 @@ static int llama_decode_internal(
|
||||
|
||||
llama_set_inputs(lctx, ubatch);
|
||||
|
||||
llama_graph_compute(lctx, gf, n_threads, threadpool);
|
||||
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
|
||||
switch (compute_status) {
|
||||
case GGML_STATUS_SUCCESS:
|
||||
break;
|
||||
case GGML_STATUS_ABORTED:
|
||||
return 2;
|
||||
case GGML_STATUS_ALLOC_FAILED:
|
||||
return -2;
|
||||
case GGML_STATUS_FAILED:
|
||||
default:
|
||||
return -3;
|
||||
}
|
||||
|
||||
// update the kv ring buffer
|
||||
{
|
||||
@ -17038,7 +17051,18 @@ static int llama_encode_internal(
|
||||
|
||||
llama_set_inputs(lctx, ubatch);
|
||||
|
||||
llama_graph_compute(lctx, gf, n_threads, threadpool);
|
||||
const auto compute_status = llama_graph_compute(lctx, gf, n_threads, threadpool);
|
||||
switch (compute_status) {
|
||||
case GGML_STATUS_SUCCESS:
|
||||
break;
|
||||
case GGML_STATUS_ABORTED:
|
||||
return 2;
|
||||
case GGML_STATUS_ALLOC_FAILED:
|
||||
return -2;
|
||||
case GGML_STATUS_FAILED:
|
||||
default:
|
||||
return -3;
|
||||
}
|
||||
|
||||
// extract embeddings
|
||||
if (embd) {
|
||||
|
Loading…
Reference in New Issue
Block a user