Adds _PRELOAD_MMAP_FILE flag to fully preload the model even when using mmap(). This brings back consistency so benchmarking token inference does not depend on ssd/disk speed anymore.

2025-01-03 23:34:35 +00:00 · 2023-04-10 05:14:35 +02:00 · 2023-04-10 05:14:35 +02:00 · 5010b6ae84
commit 5010b6ae84
parent 180b693a47
1 changed files with 33 additions and 0 deletions
--- a/llama_util.h
+++ b/llama_util.h
@ -156,7 +156,35 @@ static std::string llama_format_win_err(DWORD err) {
 struct llama_mmap {
    void * addr;
    size_t size;
+    void preload_mmap_file(void *addr, size_t length)
+    {
+    #ifndef _PRELOAD_MMAP_FILE
+        return;
+    #endif
+    // Get the page size of the system
+    #if defined(_WIN32)
+        SYSTEM_INFO si;
+        GetSystemInfo(&si);
+        long page_size = si.dwPageSize;
+    #else
+        long page_size = sysconf(_SC_PAGE_SIZE); // in windows we can use GetSystemInfo:
+    #endif

+        if (page_size == -1)
+        {
+            perror("sysconf");
+            return;
+        }
+
+        // Loop over the mapped file, jumping by page size
+        for (size_t i = 0; i < length; i += page_size)
+        {
+            // Dereference the pointer at each page boundary
+            volatile char c = ((char *)addr)[i];
+            // Force the compiler to not optimize the loop away:
+            (void)c; // Use the value of 'c' to avoid compiler warnings and ensure the loop is not optimized away
+        }
+    }
    llama_mmap(const llama_mmap &) = delete;

 #ifdef _POSIX_MAPPED_FILES
@ -180,6 +208,8 @@ struct llama_mmap {
            fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
                    strerror(errno));
        }
+        // if _PRELOAD_MMAP_FILE is define, this will preload the file into the page cache efficiently
+        preload_mmap_file(addr, file->size);
    }

    ~llama_mmap() {
@ -217,6 +247,9 @@ struct llama_mmap {
            fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
                    llama_format_win_err(GetLastError()).c_str());
        }
+
+        // if _PRELOAD_MMAP_FILE is define, this will preload the file into the page cache efficiently
+        preload_mmap_file(addr, file->size);
    }

    ~llama_mmap() {