mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 03:44:35 +00:00
1631298475
These flags are only really useful for linting. They put GCC and other compilers into `__STRICT_ANSI__` mode. That can make systems stuff slower, in favor of standards conformance, since it may cause headers to remove platform specific goodness. It also makes builds more painful on old distros that have the functions we need, but track an older version of the standards where those functions weren't strictly available. One such example is mkstemp(). It's available everywhere in practice, but GA Ubuntu in strict ansi mode complains about it. If we don't use mkstemp() then that'll put us on the security radar with other platforms.
211 lines
5.0 KiB
Makefile
211 lines
5.0 KiB
Makefile
ifndef UNAME_S
|
|
UNAME_S := $(shell uname -s)
|
|
endif
|
|
|
|
ifndef UNAME_P
|
|
UNAME_P := $(shell uname -p)
|
|
endif
|
|
|
|
ifndef UNAME_M
|
|
UNAME_M := $(shell uname -m)
|
|
endif
|
|
|
|
CCV := $(shell $(CC) --version | head -n 1)
|
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
|
|
|
# Mac OS + Arm can report x86_64
|
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
|
ifeq ($(UNAME_S),Darwin)
|
|
ifneq ($(UNAME_P),arm)
|
|
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
|
|
ifeq ($(SYSCTL_M),1)
|
|
# UNAME_P := arm
|
|
# UNAME_M := arm64
|
|
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
#
|
|
# Compile flags
|
|
#
|
|
|
|
CFLAGS = -I. -O3 -DNDEBUG -fPIC
|
|
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -fPIC
|
|
LDFLAGS =
|
|
|
|
# OS specific
|
|
# TODO: support Windows
|
|
ifeq ($(UNAME_S),Linux)
|
|
CFLAGS += -pthread
|
|
CXXFLAGS += -pthread
|
|
endif
|
|
ifeq ($(UNAME_S),Darwin)
|
|
CFLAGS += -pthread
|
|
CXXFLAGS += -pthread
|
|
endif
|
|
ifeq ($(UNAME_S),FreeBSD)
|
|
CFLAGS += -pthread
|
|
CXXFLAGS += -pthread
|
|
endif
|
|
ifeq ($(UNAME_S),NetBSD)
|
|
CFLAGS += -pthread
|
|
CXXFLAGS += -pthread
|
|
endif
|
|
ifeq ($(UNAME_S),Haiku)
|
|
CFLAGS += -pthread
|
|
CXXFLAGS += -pthread
|
|
endif
|
|
|
|
# Architecture specific
|
|
# TODO: probably these flags need to be tweaked on some architectures
|
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
|
ifeq ($(UNAME_S),Darwin)
|
|
CFLAGS += -mf16c
|
|
AVX1_M := $(shell sysctl machdep.cpu.features)
|
|
ifneq (,$(findstring FMA,$(AVX1_M)))
|
|
CFLAGS += -mfma
|
|
endif
|
|
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
|
CFLAGS += -mavx
|
|
endif
|
|
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
|
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
CFLAGS += -mavx2
|
|
endif
|
|
else ifeq ($(UNAME_S),Linux)
|
|
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
|
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
CFLAGS += -mavx
|
|
endif
|
|
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
|
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
CFLAGS += -mavx2
|
|
endif
|
|
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
|
ifneq (,$(findstring fma,$(FMA_M)))
|
|
CFLAGS += -mfma
|
|
endif
|
|
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
|
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
CFLAGS += -mf16c
|
|
endif
|
|
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
|
ifneq (,$(findstring sse3,$(SSE3_M)))
|
|
CFLAGS += -msse3
|
|
endif
|
|
else ifeq ($(UNAME_S),Haiku)
|
|
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
|
|
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
CFLAGS += -mavx
|
|
endif
|
|
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
|
|
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
CFLAGS += -mavx2
|
|
endif
|
|
FMA_M := $(shell sysinfo -cpu | grep "FMA ")
|
|
ifneq (,$(findstring fma,$(FMA_M)))
|
|
CFLAGS += -mfma
|
|
endif
|
|
F16C_M := $(shell sysinfo -cpu | grep "F16C ")
|
|
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
CFLAGS += -mf16c
|
|
endif
|
|
else
|
|
CFLAGS += -mfma -mf16c -mavx -mavx2
|
|
endif
|
|
endif
|
|
ifeq ($(UNAME_M),amd64)
|
|
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
|
endif
|
|
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
|
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
|
CFLAGS += -mpower9-vector
|
|
endif
|
|
# Require c++23's std::byteswap for big-endian support.
|
|
ifeq ($(UNAME_M),ppc64)
|
|
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
|
endif
|
|
endif
|
|
ifndef LLAMA_NO_ACCELERATE
|
|
# Mac M1 - include Accelerate framework
|
|
ifeq ($(UNAME_S),Darwin)
|
|
CFLAGS += -DGGML_USE_ACCELERATE
|
|
LDFLAGS += -framework Accelerate
|
|
endif
|
|
endif
|
|
ifdef LLAMA_OPENBLAS
|
|
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
|
LDFLAGS += -lopenblas
|
|
endif
|
|
ifdef LLAMA_GPROF
|
|
CFLAGS += -pg
|
|
CXXFLAGS += -pg
|
|
endif
|
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
|
CFLAGS += -mcpu=native
|
|
CXXFLAGS += -mcpu=native
|
|
endif
|
|
ifneq ($(filter armv6%,$(UNAME_M)),)
|
|
# Raspberry Pi 1, 2, 3
|
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
|
endif
|
|
ifneq ($(filter armv7%,$(UNAME_M)),)
|
|
# Raspberry Pi 4
|
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
|
endif
|
|
ifneq ($(filter armv8%,$(UNAME_M)),)
|
|
# Raspberry Pi 4
|
|
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
|
endif
|
|
|
|
#
|
|
# Print build information
|
|
#
|
|
|
|
$(info I llama.cpp build info: )
|
|
$(info I UNAME_S: $(UNAME_S))
|
|
$(info I UNAME_P: $(UNAME_P))
|
|
$(info I UNAME_M: $(UNAME_M))
|
|
$(info I CFLAGS: $(CFLAGS))
|
|
$(info I CXXFLAGS: $(CXXFLAGS))
|
|
$(info I LDFLAGS: $(LDFLAGS))
|
|
$(info I CC: $(CCV))
|
|
$(info I CXX: $(CXXV))
|
|
$(info )
|
|
|
|
default: main quantize
|
|
|
|
#
|
|
# Build library
|
|
#
|
|
|
|
ggml.o: ggml.c ggml.h
|
|
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
|
|
|
mmap.o: mmap.c mmap.h
|
|
$(CC) $(CFLAGS) -c mmap.c -o mmap.o
|
|
|
|
utils.o: utils.cpp utils.h
|
|
$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o
|
|
|
|
clean:
|
|
rm -f *.o main quantize
|
|
|
|
main: main.cpp ggml.o utils.o mmap.o
|
|
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o mmap.o -o main $(LDFLAGS)
|
|
./main -h
|
|
|
|
quantize: quantize.cpp ggml.o utils.o
|
|
$(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS)
|
|
|
|
#
|
|
# Tests
|
|
#
|
|
|
|
.PHONY: tests
|
|
tests:
|
|
bash ./tests/run-tests.sh
|