mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 11:40:17 +00:00
llama : fix platforms without mmap (#4578)
* llama : fix platforms without mmap * win32 : limit prefetch size to the file size * fix win32 error clobber, unnecessary std::string in std::runtime_error
This commit is contained in:
parent
48b24b170e
commit
48b7ff193e
@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale(
|
|||||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||||
|
|
||||||
const float scale = ((float *) dst->op_params)[0];
|
float scale;
|
||||||
|
memcpy(&scale, dst->op_params, sizeof(float));
|
||||||
|
|
||||||
scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
|
scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
6
ggml.c
6
ggml.c
@ -10335,7 +10335,8 @@ static void ggml_compute_forward_scale_f32(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// scale factor
|
// scale factor
|
||||||
const float v = *(float *) dst->op_params;
|
float v;
|
||||||
|
memcpy(&v, dst->op_params, sizeof(float));
|
||||||
|
|
||||||
const int ith = params->ith;
|
const int ith = params->ith;
|
||||||
const int nth = params->nth;
|
const int nth = params->nth;
|
||||||
@ -15152,7 +15153,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||||||
{
|
{
|
||||||
// necessary for llama
|
// necessary for llama
|
||||||
if (src0->grad) {
|
if (src0->grad) {
|
||||||
const float s = ((float *) tensor->op_params)[0];
|
float s;
|
||||||
|
memcpy(&s, tensor->op_params, sizeof(float));
|
||||||
|
|
||||||
src0->grad =
|
src0->grad =
|
||||||
ggml_add_or_set(ctx,
|
ggml_add_or_set(ctx,
|
||||||
|
36
llama.cpp
36
llama.cpp
@ -778,7 +778,7 @@ struct llama_file {
|
|||||||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||||
}
|
}
|
||||||
if (ret != 1) {
|
if (ret != 1) {
|
||||||
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
throw std::runtime_error("unexpectedly reached end of file");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -931,29 +931,29 @@ struct llama_mmap {
|
|||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
static constexpr bool SUPPORTED = true;
|
static constexpr bool SUPPORTED = true;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
|
||||||
(void) numa;
|
GGML_UNUSED(numa);
|
||||||
|
|
||||||
size = file->size;
|
size = file->size;
|
||||||
|
|
||||||
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
||||||
|
|
||||||
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
|
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||||
DWORD error = GetLastError();
|
|
||||||
|
|
||||||
if (hMapping == NULL) {
|
if (hMapping == NULL) {
|
||||||
|
DWORD error = GetLastError();
|
||||||
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
|
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
|
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
|
||||||
error = GetLastError();
|
DWORD error = GetLastError();
|
||||||
CloseHandle(hMapping);
|
CloseHandle(hMapping);
|
||||||
|
|
||||||
if (addr == NULL) {
|
if (addr == NULL) {
|
||||||
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
|
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prefetch) {
|
if (prefetch > 0) {
|
||||||
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
|
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
|
||||||
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
|
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
|
||||||
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
|
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
|
||||||
@ -965,9 +965,9 @@ struct llama_mmap {
|
|||||||
// advise the kernel to preload the mapped memory
|
// advise the kernel to preload the mapped memory
|
||||||
WIN32_MEMORY_RANGE_ENTRY range;
|
WIN32_MEMORY_RANGE_ENTRY range;
|
||||||
range.VirtualAddress = addr;
|
range.VirtualAddress = addr;
|
||||||
range.NumberOfBytes = (SIZE_T)size;
|
range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
|
||||||
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
|
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
|
||||||
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
|
LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
|
||||||
llama_format_win_err(GetLastError()).c_str());
|
llama_format_win_err(GetLastError()).c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -982,26 +982,26 @@ struct llama_mmap {
|
|||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
if (!UnmapViewOfFile(addr)) {
|
if (!UnmapViewOfFile(addr)) {
|
||||||
fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
|
LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
|
||||||
llama_format_win_err(GetLastError()).c_str());
|
llama_format_win_err(GetLastError()).c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static constexpr bool SUPPORTED = false;
|
static constexpr bool SUPPORTED = false;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
|
llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) {
|
||||||
(void) file;
|
GGML_UNUSED(file);
|
||||||
(void) prefetch;
|
GGML_UNUSED(prefetch);
|
||||||
(void) numa;
|
GGML_UNUSED(numa);
|
||||||
|
|
||||||
throw std::runtime_error(std::string("mmap not supported"));
|
throw std::runtime_error("mmap not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap(size_t offset, size_t len) {
|
void unmap_fragment(size_t first, size_t last) {
|
||||||
(void) offset;
|
GGML_UNUSED(first);
|
||||||
(void) len;
|
GGML_UNUSED(last);
|
||||||
|
|
||||||
throw std::runtime_error(std::string("mmap not supported"));
|
throw std::runtime_error("mmap not supported");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user