From f4cef87edfd1b2f8d5befd4fde54ca2e03987bea Mon Sep 17 00:00:00 2001 From: DannyDaemonic Date: Mon, 1 May 2023 09:23:47 -0700 Subject: [PATCH] Add git-based build information for better issue tracking (#1232) * Add git-based build information for better issue tracking * macOS fix * "build (hash)" and "CMAKE_SOURCE_DIR" changes * Redo "CMAKE_CURRENT_SOURCE_DIR" and clearer build messages * Fix conditional dependency on missing target * Broke out build-info.cmake, added find_package fallback, and added build into to all examples, added dependencies to Makefile * 4 space indenting for cmake, attempt to clean up my mess in Makefile * Short hash, less fancy Makefile, and don't modify build-info.h if it wouldn't change it --- .gitignore | 1 + CMakeLists.txt | 35 +++++++++++++ Makefile | 51 ++++++++++++------- examples/benchmark/CMakeLists.txt | 3 ++ examples/benchmark/benchmark-matmult.cpp | 4 +- examples/embedding/CMakeLists.txt | 3 ++ examples/embedding/embedding.cpp | 5 +- examples/main/CMakeLists.txt | 3 ++ examples/main/main.cpp | 5 +- examples/perplexity/CMakeLists.txt | 3 ++ examples/perplexity/perplexity.cpp | 5 +- examples/quantize-stats/quantize-stats.cpp | 3 ++ examples/quantize/CMakeLists.txt | 3 ++ examples/quantize/quantize.cpp | 3 ++ examples/save-load-state/CMakeLists.txt | 3 ++ examples/save-load-state/save-load-state.cpp | 3 ++ scripts/build-info.cmake | 53 ++++++++++++++++++++ scripts/build-info.sh | 22 ++++++++ 18 files changed, 186 insertions(+), 22 deletions(-) create mode 100644 scripts/build-info.cmake create mode 100755 scripts/build-info.sh diff --git a/.gitignore b/.gitignore index 565866fd4..e479c6180 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ models/* /vdot /Pipfile +build-info.h arm_neon.h compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 098306126..f6a66daa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,41 @@ option(LLAMA_CLBLAST "llama: use CLBlast" option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +# +# Build info header +# + +# Write header template to binary dir to keep source directory clean +file(WRITE "${CMAKE_BINARY_DIR}/BUILD_INFO.h.in" "\ +#ifndef BUILD_INFO_H\n\ +#define BUILD_INFO_H\n\ +\n\ +#define BUILD_NUMBER @BUILD_NUMBER@\n\ +#define BUILD_COMMIT \"@BUILD_COMMIT@\"\n\ +\n\ +#endif // BUILD_INFO_H\n\ +") + +# Generate initial build-info.h +include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + # Add a custom target for build-info.h + add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h") + + # Add a custom command to rebuild build-info.h when .git/index changes + add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/.git/index" + VERBATIM + ) +else() + message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") +endif() + # # Compile flags # diff --git a/Makefile b/Makefile index 1d62a4438..6ebc3c5b9 100644 --- a/Makefile +++ b/Makefile @@ -181,41 +181,56 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h common.o: examples/common.cpp examples/common.h $(CXX) $(CXXFLAGS) -c $< -o $@ -clean: - rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-matmult +libllama.so: llama.o ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) -main: examples/main/main.cpp ggml.o llama.o common.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +clean: + rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state build-info.h + +# +# Examples +# + +main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @echo @echo '==== Run ./main -h for help. ====' @echo -quantize: examples/quantize/quantize.cpp ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -libllama.so: llama.o ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) +build-info.h: $(wildcard .git/index) scripts/build-info.sh + @scripts/build-info.sh > $@.tmp + @if ! cmp -s $@.tmp $@; then \ + mv $@.tmp $@; \ + else \ + rm $@.tmp; \ + fi # # Tests # -benchmark-matmult: examples/benchmark/benchmark-matmult.cpp ggml.o $(OBJS) - $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) ./$@ +vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) + .PHONY: tests tests: bash ./tests/run-tests.sh diff --git a/examples/benchmark/CMakeLists.txt b/examples/benchmark/CMakeLists.txt index 05deebcd1..037696194 100644 --- a/examples/benchmark/CMakeLists.txt +++ b/examples/benchmark/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET benchmark) add_executable(${TARGET} benchmark-matmult.cpp) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 19cbab1c3..2cc1a1477 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -1,5 +1,6 @@ #include #include "ggml.h" +#include "build-info.h" #include #include #include @@ -90,9 +91,10 @@ int main(int argc, char ** argv) { } } - // create the ggml context + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); printf("Starting Test\n"); + // create the ggml context struct ggml_context * ctx; //const int sizex = 4096; //const int sizey = 11008; diff --git a/examples/embedding/CMakeLists.txt b/examples/embedding/CMakeLists.txt index 88c425d4a..db73b6b44 100644 --- a/examples/embedding/CMakeLists.txt +++ b/examples/embedding/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET embedding) add_executable(${TARGET} embedding.cpp) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index e10de619c..b3e001476 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -1,5 +1,6 @@ #include "common.h" #include "llama.h" +#include "build-info.h" #include @@ -18,11 +19,13 @@ int main(int argc, char ** argv) { "expect poor results\n", __func__, params.n_ctx); } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + if (params.seed <= 0) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/main/CMakeLists.txt b/examples/main/CMakeLists.txt index b2dcc2910..c364242fb 100644 --- a/examples/main/CMakeLists.txt +++ b/examples/main/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET main) add_executable(${TARGET} main.cpp) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 78fc9a197..7dc100512 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -5,6 +5,7 @@ #include "common.h" #include "llama.h" +#include "build-info.h" #include #include @@ -81,11 +82,13 @@ int main(int argc, char ** argv) { "expect poor results\n", __func__, params.n_ctx); } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + if (params.seed <= 0) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/perplexity/CMakeLists.txt b/examples/perplexity/CMakeLists.txt index 5836df8b2..61b17b828 100644 --- a/examples/perplexity/CMakeLists.txt +++ b/examples/perplexity/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET perplexity) add_executable(${TARGET} perplexity.cpp) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 615157e7b..2ca338835 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1,5 +1,6 @@ #include "common.h" #include "llama.h" +#include "build-info.h" #include #include @@ -106,11 +107,13 @@ int main(int argc, char ** argv) { "expect poor results\n", __func__, params.n_ctx); } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + if (params.seed <= 0) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 4e6c2c831..9a2aa7c64 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -1,4 +1,5 @@ #include "ggml.h" +#include "build-info.h" #define LLAMA_API_INTERNAL #include "llama.h" @@ -308,6 +309,8 @@ int main(int argc, char ** argv) { return 1; } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + // load the model fprintf(stderr, "Loading model\n"); diff --git a/examples/quantize/CMakeLists.txt b/examples/quantize/CMakeLists.txt index fb27d4517..475fc8be8 100644 --- a/examples/quantize/CMakeLists.txt +++ b/examples/quantize/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET quantize) add_executable(${TARGET} quantize.cpp) target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index dd175c690..198bd5fcb 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -1,5 +1,6 @@ #include "ggml.h" #include "llama.h" +#include "build-info.h" #include #include @@ -50,6 +51,8 @@ int main(int argc, char ** argv) { ftype = (enum llama_ftype)atoi(argv[3]); } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + int nthread = argc > 4 ? atoi(argv[4]) : 0; const int64_t t_main_start_us = ggml_time_us(); diff --git a/examples/save-load-state/CMakeLists.txt b/examples/save-load-state/CMakeLists.txt index cff79fa1f..08dbe5c2b 100644 --- a/examples/save-load-state/CMakeLists.txt +++ b/examples/save-load-state/CMakeLists.txt @@ -2,3 +2,6 @@ set(TARGET save-load-state) add_executable(${TARGET} save-load-state.cpp) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index f1531ba39..ea0a984d9 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -1,5 +1,6 @@ #include "common.h" #include "llama.h" +#include "build-info.h" #include #include @@ -17,6 +18,8 @@ int main(int argc, char ** argv) { return 1; } + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + if (params.n_predict < 0) { params.n_predict = 16; } diff --git a/scripts/build-info.cmake b/scripts/build-info.cmake new file mode 100644 index 000000000..fb46ed2b5 --- /dev/null +++ b/scripts/build-info.cmake @@ -0,0 +1,53 @@ +set(TEMPLATE_FILE "${CMAKE_BINARY_DIR}/BUILD_INFO.h.in") +set(HEADER_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h") +set(BUILD_NUMBER 0) +set(BUILD_COMMIT "unknown") + +# Look for git +find_package(Git) +if(NOT Git_FOUND) + execute_process( + COMMAND which git + OUTPUT_VARIABLE GIT_EXECUTABLE + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT GIT_EXECUTABLE STREQUAL "") + set(Git_FOUND TRUE) + message(STATUS "Found Git using 'which': ${GIT_EXECUTABLE}") + else() + message(WARNING "Git not found using 'find_package' or 'which'. Build info will not be accurate. Consider installing Git or ensuring it is in the PATH.") + endif() +endif() + +# Get the commit count and hash +if(Git_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE HEAD + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE GIT_HEAD_RESULT + ) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE COUNT + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE GIT_COUNT_RESULT + ) + if(GIT_HEAD_RESULT EQUAL 0 AND GIT_COUNT_RESULT EQUAL 0) + set(BUILD_COMMIT ${HEAD}) + set(BUILD_NUMBER ${COUNT}) + endif() +endif() + +# Only write the header if it's changed to prevent unnecessary recompilation +if(EXISTS ${HEADER_FILE}) + file(STRINGS ${HEADER_FILE} CONTENTS REGEX "BUILD_COMMIT \"([^\"]*)\"") + list(GET CONTENTS 0 EXISTING) + if(NOT EXISTING STREQUAL "#define BUILD_COMMIT \"${BUILD_COMMIT}\"") + configure_file(${TEMPLATE_FILE} ${HEADER_FILE}) + endif() +else() + configure_file(${TEMPLATE_FILE} ${HEADER_FILE}) +endif() diff --git a/scripts/build-info.sh b/scripts/build-info.sh new file mode 100755 index 000000000..507d7e153 --- /dev/null +++ b/scripts/build-info.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +BUILD_NUMBER="0" +BUILD_COMMIT="unknown" + +REV_LIST=$(git rev-list --count HEAD) +if [ $? -eq 0 ]; then + BUILD_NUMBER=$REV_LIST +fi + +REV_PARSE=$(git rev-parse --short HEAD) +if [ $? -eq 0 ]; then + BUILD_COMMIT=$REV_PARSE +fi + +echo "#ifndef BUILD_INFO_H" +echo "#define BUILD_INFO_H" +echo "" +echo "#define BUILD_NUMBER $BUILD_NUMBER" +echo "#define BUILD_COMMIT \"$BUILD_COMMIT\"" +echo "" +echo "#endif // BUILD_INFO_H"