mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 11:40:17 +00:00
Fixed mmap prefetch for GPU offloading (#2529)
This commit is contained in:
parent
f6f9896ac3
commit
3d9a551816
@ -219,7 +219,7 @@ struct llama_mmap {
|
|||||||
// prefetch/readahead impairs performance on NUMA systems
|
// prefetch/readahead impairs performance on NUMA systems
|
||||||
if (numa) { prefetch = 0; }
|
if (numa) { prefetch = 0; }
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (prefetch) { flags |= MAP_POPULATE; }
|
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
|
||||||
#endif
|
#endif
|
||||||
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
||||||
if (addr == MAP_FAILED) {
|
if (addr == MAP_FAILED) {
|
||||||
|
@ -747,12 +747,12 @@ struct llama_model_loader {
|
|||||||
|
|
||||||
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
size_t prefetch_size = 0;
|
size_t prefetch_size = file_loader->file.size;
|
||||||
size_t lock_size = 0;
|
size_t lock_size = 0;
|
||||||
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
||||||
data_size += lt.size;
|
data_size += lt.size;
|
||||||
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
|
if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
|
||||||
prefetch_size += lt.size;
|
prefetch_size -= lt.size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user