mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 03:31:46 +00:00
llamafile : tmp disable + build sgemm.o when needed (#6716)
* build : sgemm.o only when needed ggml-ci * llamafile : tmp disable due to MoE bug ggml-ci
This commit is contained in:
parent
8dd1ec8b3f
commit
3b8f1ec4b1
@ -43,6 +43,18 @@ else()
|
|||||||
set(LLAMA_METAL_DEFAULT OFF)
|
set(LLAMA_METAL_DEFAULT OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# TODO: fix this for Android CI
|
||||||
|
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
|
||||||
|
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
|
||||||
|
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
|
||||||
|
#else()
|
||||||
|
# set(LLAMA_LLAMAFILE_DEFAULT ON)
|
||||||
|
#endif()
|
||||||
|
|
||||||
|
# TODO: temporary disable until MoE is fixed
|
||||||
|
# https://github.com/ggerganov/llama.cpp/pull/6716
|
||||||
|
set(LLAMA_LLAMAFILE_DEFAULT OFF)
|
||||||
|
|
||||||
# general
|
# general
|
||||||
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
||||||
option(LLAMA_STATIC "llama: static link libraries" OFF)
|
option(LLAMA_STATIC "llama: static link libraries" OFF)
|
||||||
@ -88,7 +100,7 @@ endif()
|
|||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||||
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON)
|
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
|
||||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||||
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
||||||
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
||||||
@ -372,6 +384,9 @@ endif()
|
|||||||
|
|
||||||
if (LLAMA_LLAMAFILE)
|
if (LLAMA_LLAMAFILE)
|
||||||
add_compile_definitions(GGML_USE_LLAMAFILE)
|
add_compile_definitions(GGML_USE_LLAMAFILE)
|
||||||
|
|
||||||
|
set(GGML_HEADERS_LLAMAFILE sgemm.h)
|
||||||
|
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_QKK_64)
|
if (LLAMA_QKK_64)
|
||||||
@ -1157,8 +1172,6 @@ add_library(ggml OBJECT
|
|||||||
ggml-backend.h
|
ggml-backend.h
|
||||||
ggml-quants.c
|
ggml-quants.c
|
||||||
ggml-quants.h
|
ggml-quants.h
|
||||||
sgemm.cpp
|
|
||||||
sgemm.h
|
|
||||||
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
||||||
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
|
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
|
||||||
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
|
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
|
||||||
@ -1168,6 +1181,7 @@ add_library(ggml OBJECT
|
|||||||
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
||||||
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
|
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
|
||||||
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
||||||
|
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
|
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
|
||||||
|
28
Makefile
28
Makefile
@ -219,13 +219,6 @@ ifdef LLAMA_DISABLE_LOGS
|
|||||||
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
|
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
|
||||||
endif # LLAMA_DISABLE_LOGS
|
endif # LLAMA_DISABLE_LOGS
|
||||||
|
|
||||||
# disable ggml.c's use of sgemm.cpp
|
|
||||||
ifdef LLAMA_NO_LLAMAFILE
|
|
||||||
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
|
|
||||||
else
|
|
||||||
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
|
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
|
||||||
@ -391,6 +384,15 @@ ifdef LLAMA_OPENBLAS
|
|||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
||||||
endif # LLAMA_OPENBLAS
|
endif # LLAMA_OPENBLAS
|
||||||
|
|
||||||
|
# TODO: temporary disable until MoE is fixed
|
||||||
|
# https://github.com/ggerganov/llama.cpp/pull/6716
|
||||||
|
LLAMA_NO_LLAMAFILE := 1
|
||||||
|
|
||||||
|
ifndef LLAMA_NO_LLAMAFILE
|
||||||
|
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
||||||
|
OBJS += sgemm.o
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef LLAMA_BLIS
|
ifdef LLAMA_BLIS
|
||||||
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
|
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
|
||||||
MK_LDFLAGS += -lblis -L/usr/local/lib
|
MK_LDFLAGS += -lblis -L/usr/local/lib
|
||||||
@ -487,11 +489,9 @@ ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/com
|
|||||||
|
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||||
$(NVCC_COMPILE)
|
$(NVCC_COMPILE)
|
||||||
|
|
||||||
endif # LLAMA_CUDA
|
endif # LLAMA_CUDA
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
|
|
||||||
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
|
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
|
||||||
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
||||||
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
|
||||||
@ -610,6 +610,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
|
|||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
endif # LLAMA_MPI
|
endif # LLAMA_MPI
|
||||||
|
|
||||||
|
ifndef LLAMA_NO_LLAMAFILE
|
||||||
|
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
GF_CC := $(CC)
|
GF_CC := $(CC)
|
||||||
include scripts/get-flags.mk
|
include scripts/get-flags.mk
|
||||||
|
|
||||||
@ -683,16 +688,13 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
|||||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
unicode.o: unicode.cpp unicode.h
|
unicode.o: unicode.cpp unicode.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
unicode-data.o: unicode-data.cpp unicode-data.h
|
unicode-data.o: unicode-data.cpp unicode-data.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o sgemm.o
|
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
|
||||||
|
|
||||||
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
Loading…
Reference in New Issue
Block a user