mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 09:11:46 +00:00
Adds _PRELOAD_MMAP_FILE flag to fully preload the model even when using mmap(). This brings back consistency so benchmarking token inference does not depend on ssd/disk speed anymore.
This commit is contained in:
parent
180b693a47
commit
5010b6ae84
33
llama_util.h
33
llama_util.h
@ -156,7 +156,35 @@ static std::string llama_format_win_err(DWORD err) {
|
|||||||
struct llama_mmap {
|
struct llama_mmap {
|
||||||
void * addr;
|
void * addr;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
void preload_mmap_file(void *addr, size_t length)
|
||||||
|
{
|
||||||
|
#ifndef _PRELOAD_MMAP_FILE
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
// Get the page size of the system
|
||||||
|
#if defined(_WIN32)
|
||||||
|
SYSTEM_INFO si;
|
||||||
|
GetSystemInfo(&si);
|
||||||
|
long page_size = si.dwPageSize;
|
||||||
|
#else
|
||||||
|
long page_size = sysconf(_SC_PAGE_SIZE); // in windows we can use GetSystemInfo:
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (page_size == -1)
|
||||||
|
{
|
||||||
|
perror("sysconf");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over the mapped file, jumping by page size
|
||||||
|
for (size_t i = 0; i < length; i += page_size)
|
||||||
|
{
|
||||||
|
// Dereference the pointer at each page boundary
|
||||||
|
volatile char c = ((char *)addr)[i];
|
||||||
|
// Force the compiler to not optimize the loop away:
|
||||||
|
(void)c; // Use the value of 'c' to avoid compiler warnings and ensure the loop is not optimized away
|
||||||
|
}
|
||||||
|
}
|
||||||
llama_mmap(const llama_mmap &) = delete;
|
llama_mmap(const llama_mmap &) = delete;
|
||||||
|
|
||||||
#ifdef _POSIX_MAPPED_FILES
|
#ifdef _POSIX_MAPPED_FILES
|
||||||
@ -180,6 +208,8 @@ struct llama_mmap {
|
|||||||
fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
|
fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
|
// if _PRELOAD_MMAP_FILE is define, this will preload the file into the page cache efficiently
|
||||||
|
preload_mmap_file(addr, file->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
@ -217,6 +247,9 @@ struct llama_mmap {
|
|||||||
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
|
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
|
||||||
llama_format_win_err(GetLastError()).c_str());
|
llama_format_win_err(GetLastError()).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if _PRELOAD_MMAP_FILE is define, this will preload the file into the page cache efficiently
|
||||||
|
preload_mmap_file(addr, file->size);
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
|
Loading…
Reference in New Issue
Block a user