2023-03-10 18:40:58 +00:00
i f n d e f U N A M E _ S
UNAME_S := $( shell uname -s)
e n d i f
i f n d e f U N A M E _ P
UNAME_P := $( shell uname -p)
e n d i f
i f n d e f U N A M E _ M
UNAME_M := $( shell uname -m)
e n d i f
CCV := $( shell $( CC) --version | head -n 1)
CXXV := $( shell $( CXX) --version | head -n 1)
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
i f e q ( $( UNAME_S ) , D a r w i n )
ifneq ( $( UNAME_P) ,arm)
2023-03-21 15:44:11 +00:00
SYSCTL_M := $( shell sysctl -n hw.optional.arm64 2>/dev/null)
2023-03-10 18:40:58 +00:00
ifeq ( $( SYSCTL_M) ,1)
# UNAME_P := arm
# UNAME_M := arm64
warn := $( warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\# issuecomment-1282546789)
endif
endif
e n d i f
#
# Compile flags
#
2023-03-21 15:29:41 +00:00
# keep standard at C11 and C++11
2023-03-10 18:40:58 +00:00
CFLAGS = -I. -O3 -DNDEBUG -std= c11 -fPIC
2023-03-21 15:29:41 +00:00
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std= c++11 -fPIC
2023-03-10 18:40:58 +00:00
LDFLAGS =
# OS specific
# TODO: support Windows
i f e q ( $( UNAME_S ) , L i n u x )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , D a r w i n )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
i f e q ( $( UNAME_S ) , F r e e B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-13 16:40:54 +00:00
i f e q ( $( UNAME_S ) , N e t B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-21 15:50:09 +00:00
i f e q ( $( UNAME_S ) , O p e n B S D )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
2023-03-10 18:40:58 +00:00
i f e q ( $( UNAME_S ) , H a i k u )
CFLAGS += -pthread
CXXFLAGS += -pthread
e n d i f
# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
i f e q ( $( UNAME_M ) , $( filter $ ( UNAME_M ) ,x 86_ 64 i 686) )
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -mf16c
AVX1_M := $( shell sysctl machdep.cpu.features)
ifneq ( ,$( findstring FMA,$( AVX1_M) ) )
CFLAGS += -mfma
endif
ifneq ( ,$( findstring AVX1.0,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell sysctl machdep.cpu.leaf7_features)
ifneq ( ,$( findstring AVX2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
else ifeq ( $( UNAME_S) ,Linux)
AVX1_M := $( shell grep "avx " /proc/cpuinfo)
ifneq ( ,$( findstring avx,$( AVX1_M) ) )
CFLAGS += -mavx
endif
AVX2_M := $( shell grep "avx2 " /proc/cpuinfo)
ifneq ( ,$( findstring avx2,$( AVX2_M) ) )
CFLAGS += -mavx2
endif
FMA_M := $( shell grep "fma " /proc/cpuinfo)
ifneq ( ,$( findstring fma,$( FMA_M) ) )
CFLAGS += -mfma
endif
F16C_M := $( shell grep "f16c " /proc/cpuinfo)
ifneq ( ,$( findstring f16c,$( F16C_M) ) )
CFLAGS += -mf16c
endif
SSE3_M := $( shell grep "sse3 " /proc/cpuinfo)
ifneq ( ,$( findstring sse3,$( SSE3_M) ) )
CFLAGS += -msse3
endif
2023-03-21 14:35:42 +00:00
AVX512F_M := $( shell grep "avx512f " /proc/cpuinfo)
ifneq ( ,$( findstring avx512f,$( AVX512F_M) ) )
CFLAGS += -mavx512f
endif
AVX512BW_M := $( shell grep "avx512bw " /proc/cpuinfo)
ifneq ( ,$( findstring avx512bw,$( AVX512BW_M) ) )
CFLAGS += -mavx512bw
endif
AVX512DQ_M := $( shell grep "avx512dq " /proc/cpuinfo)
ifneq ( ,$( findstring avx512dq,$( AVX512DQ_M) ) )
CFLAGS += -mavx512dq
endif
AVX512VL_M := $( shell grep "avx512vl " /proc/cpuinfo)
ifneq ( ,$( findstring avx512vl,$( AVX512VL_M) ) )
CFLAGS += -mavx512vl
endif
AVX512CD_M := $( shell grep "avx512cd " /proc/cpuinfo)
ifneq ( ,$( findstring avx512cd,$( AVX512CD_M) ) )
CFLAGS += -mavx512cd
endif
AVX512ER_M := $( shell grep "avx512er " /proc/cpuinfo)
ifneq ( ,$( findstring avx512er,$( AVX512ER_M) ) )
CFLAGS += -mavx512er
endif
AVX512IFMA_M := $( shell grep "avx512ifma " /proc/cpuinfo)
ifneq ( ,$( findstring avx512ifma,$( AVX512IFMA_M) ) )
CFLAGS += -mavx512ifma
endif
AVX512PF_M := $( shell grep "avx512pf " /proc/cpuinfo)
ifneq ( ,$( findstring avx512pf,$( AVX512PF_M) ) )
CFLAGS += -mavx512pf
endif
2023-03-10 18:40:58 +00:00
else ifeq ( $( UNAME_S) ,Haiku)
2023-03-21 16:21:06 +00:00
AVX1_M := $( shell sysinfo -cpu | grep -w "AVX" )
ifneq ( ,$( findstring AVX,$( AVX1_M) ) )
2023-03-10 18:40:58 +00:00
CFLAGS += -mavx
endif
2023-03-21 16:21:06 +00:00
AVX2_M := $( shell sysinfo -cpu | grep -w "AVX2" )
ifneq ( ,$( findstring AVX2,$( AVX2_M) ) )
2023-03-10 18:40:58 +00:00
CFLAGS += -mavx2
endif
2023-03-21 16:21:06 +00:00
FMA_M := $( shell sysinfo -cpu | grep -w "FMA" )
ifneq ( ,$( findstring FMA,$( FMA_M) ) )
2023-03-10 18:40:58 +00:00
CFLAGS += -mfma
endif
2023-03-21 16:21:06 +00:00
F16C_M := $( shell sysinfo -cpu | grep -w "F16C" )
ifneq ( ,$( findstring F16C,$( F16C_M) ) )
2023-03-10 18:40:58 +00:00
CFLAGS += -mf16c
endif
else
CFLAGS += -mfma -mf16c -mavx -mavx2
endif
e n d i f
i f n e q ( $( filter ppc 64%,$ ( UNAME_M ) ) , )
POWER9_M := $( shell grep "POWER9" /proc/cpuinfo)
ifneq ( ,$( findstring POWER9,$( POWER9_M) ) )
CFLAGS += -mpower9-vector
endif
# Require c++23's std::byteswap for big-endian support.
ifeq ( $( UNAME_M) ,ppc64)
CXXFLAGS += -std= c++23 -DGGML_BIG_ENDIAN
endif
e n d i f
2023-03-11 10:26:16 +00:00
i f n d e f L L A M A _ N O _ A C C E L E R A T E
2023-03-21 15:44:11 +00:00
# Mac M1 - include Accelerate framework.
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
2023-03-10 18:40:58 +00:00
ifeq ( $( UNAME_S) ,Darwin)
CFLAGS += -DGGML_USE_ACCELERATE
LDFLAGS += -framework Accelerate
endif
e n d i f
2023-03-11 10:26:16 +00:00
i f d e f L L A M A _ O P E N B L A S
2023-03-10 18:40:58 +00:00
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
e n d i f
2023-03-11 10:26:16 +00:00
i f d e f L L A M A _ G P R O F
2023-03-10 18:40:58 +00:00
CFLAGS += -pg
CXXFLAGS += -pg
e n d i f
i f n e q ( $( filter aarch 64%,$ ( UNAME_M ) ) , )
CFLAGS += -mcpu= native
CXXFLAGS += -mcpu= native
e n d i f
i f n e q ( $( filter armv 6%,$ ( UNAME_M ) ) , )
# Raspberry Pi 1, 2, 3
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access
e n d i f
i f n e q ( $( filter armv 7%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfpu= neon-fp-armv8 -mfp16-format= ieee -mno-unaligned-access -funsafe-math-optimizations
e n d i f
i f n e q ( $( filter armv 8%,$ ( UNAME_M ) ) , )
# Raspberry Pi 4
CFLAGS += -mfp16-format= ieee -mno-unaligned-access
e n d i f
#
# Print build information
#
$(info I llama.cpp build info : )
$(info I UNAME_S : $( UNAME_S ) )
$(info I UNAME_P : $( UNAME_P ) )
$(info I UNAME_M : $( UNAME_M ) )
$(info I CFLAGS : $( CFLAGS ) )
$(info I CXXFLAGS : $( CXXFLAGS ) )
$(info I LDFLAGS : $( LDFLAGS ) )
$(info I CC : $( CCV ) )
$(info I CXX : $( CXXV ) )
$( info )
default : main quantize
#
# Build library
#
ggml.o : ggml .c ggml .h
$( CC) $( CFLAGS) -c ggml.c -o ggml.o
2023-03-22 05:32:36 +00:00
llama.o : llama .cpp llama .h
$( CXX) $( CXXFLAGS) -c llama.cpp -o llama.o
2023-03-10 18:40:58 +00:00
utils.o : utils .cpp utils .h
$( CXX) $( CXXFLAGS) -c utils.cpp -o utils.o
2023-03-22 12:16:33 +00:00
run.o : run .cpp run .h
$( CXX) $( CXXFLAGS) -c run.cpp -o run.o
2023-03-10 18:40:58 +00:00
clean :
rm -f *.o main quantize
2023-03-22 12:16:33 +00:00
main : main .cpp ggml .o llama .o utils .o run .o
$( CXX) $( CXXFLAGS) main.cpp ggml.o llama.o utils.o run.o -o main $( LDFLAGS)
2023-03-21 15:44:11 +00:00
@echo "\x1b[36mrun ./main -h for help\x1b[0m"
2023-03-10 18:40:58 +00:00
2023-03-22 05:32:36 +00:00
quantize : quantize .cpp ggml .o llama .o utils .o
$( CXX) $( CXXFLAGS) quantize.cpp ggml.o llama.o utils.o -o quantize $( LDFLAGS)
2023-03-10 18:40:58 +00:00
#
# Tests
#
.PHONY : tests
tests :
bash ./tests/run-tests.sh