Reset schedule earlier to allow overlap with ggml graph computation on device (#6933)

* Reset schedule earlier to allow overlap with graph computation on device
This commit is contained in:
agray3 2024-04-26 19:08:30 +01:00 committed by GitHub
parent 0c4d489e29
commit 928e0b7013
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 11 additions and 5 deletions

View File

@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run
if (!sched->is_reset) {
size_t hash_size = sched->hash_set.size;
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
sched->is_reset = true;
}
sched->is_alloc = false;
}

View File

@ -11473,6 +11473,10 @@ static int llama_decode_internal(
}
}
// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
// overlap with device computation.
ggml_backend_sched_reset(lctx.sched);
return 0;
}