mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
arm64 support for windows (#3007)
Co-authored-by: Cebtenzzre <cebtenzzre@gmail.com>
This commit is contained in:
parent
4f7cd6ba9c
commit
b52b29ab9d
@ -461,6 +461,13 @@ endif()
|
|||||||
# TODO: probably these flags need to be tweaked on some architectures
|
# TODO: probably these flags need to be tweaked on some architectures
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
|
if (MSVC)
|
||||||
|
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
|
||||||
|
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
|
||||||
|
else ()
|
||||||
|
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
if (LLAMA_STATIC)
|
if (LLAMA_STATIC)
|
||||||
add_link_options(-static)
|
add_link_options(-static)
|
||||||
@ -476,10 +483,14 @@ if (NOT MSVC)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64"))
|
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
|
||||||
message(STATUS "ARM detected")
|
message(STATUS "ARM detected")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
# TODO: arm msvc?
|
add_compile_definitions(__ARM_NEON)
|
||||||
|
add_compile_definitions(__ARM_FEATURE_FMA)
|
||||||
|
add_compile_definitions(__ARM_FEATURE_DOTPROD)
|
||||||
|
# add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) # MSVC doesn't support vdupq_n_f16, vld1q_f16, vst1q_f16
|
||||||
|
add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
|
||||||
else()
|
else()
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
||||||
# Raspberry Pi 1, Zero
|
# Raspberry Pi 1, Zero
|
||||||
@ -494,7 +505,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
|
|||||||
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
|
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
|
||||||
message(STATUS "x86 detected")
|
message(STATUS "x86 detected")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
if (LLAMA_AVX512)
|
if (LLAMA_AVX512)
|
||||||
|
2
ggml.c
2
ggml.c
@ -283,7 +283,7 @@ typedef double ggml_float;
|
|||||||
// 16-bit float
|
// 16-bit float
|
||||||
// on Arm, we use __fp16
|
// on Arm, we use __fp16
|
||||||
// on x86, we use uint16_t
|
// on x86, we use uint16_t
|
||||||
#ifdef __ARM_NEON
|
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||||
|
|
||||||
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
||||||
//
|
//
|
||||||
|
2
ggml.h
2
ggml.h
@ -270,7 +270,7 @@ extern "C" {
|
|||||||
|
|
||||||
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
||||||
typedef half ggml_fp16_t;
|
typedef half ggml_fp16_t;
|
||||||
#elif defined(__ARM_NEON)
|
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||||
typedef __fp16 ggml_fp16_t;
|
typedef __fp16 ggml_fp16_t;
|
||||||
#else
|
#else
|
||||||
typedef uint16_t ggml_fp16_t;
|
typedef uint16_t ggml_fp16_t;
|
||||||
|
@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
memcpy(utmp, x[i].scales, 12);
|
memcpy(utmp, x[i].scales, 12);
|
||||||
|
|
||||||
const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
|
uint32x2_t mins8 = { 0 };
|
||||||
|
mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
|
||||||
|
mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
|
||||||
|
|
||||||
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
||||||
utmp[0] &= kmask1;
|
utmp[0] &= kmask1;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user