diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3aa6f992..d22a041a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,7 +72,7 @@ jobs: id: cmake_test run: | cd build - ctest --verbose --timeout 900 + ctest -L main --verbose --timeout 900 ubuntu-latest-cmake-sanitizer: runs-on: ubuntu-latest @@ -107,7 +107,7 @@ jobs: id: cmake_test run: | cd build - ctest --verbose --timeout 900 + ctest -L main --verbose --timeout 900 ubuntu-latest-cmake-mpi: runs-on: ubuntu-latest @@ -141,7 +141,7 @@ jobs: id: cmake_test run: | cd build - ctest --verbose + ctest -L main --verbose # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know # how to debug it. @@ -202,7 +202,7 @@ jobs: id: cmake_test run: | cd build - ctest --verbose --timeout 900 + ctest -L main --verbose --timeout 900 macOS-latest-cmake-ios: runs-on: macos-latest @@ -394,7 +394,7 @@ jobs: if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512 run: | cd build - ctest -C Release --verbose --timeout 900 + ctest -L main -C Release --verbose --timeout 900 - name: Test (Intel SDE) id: cmake_test_sde @@ -406,7 +406,7 @@ jobs: 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) cd build - & $sde -future -- ctest -C Release --verbose --timeout 900 + & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - name: Determine tag name id: tag diff --git a/.gitignore b/.gitignore index 5ab81445d..cb0069bfb 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ lcov-report/ gcovr-report/ -build*/ +build* out/ tmp/ @@ -89,20 +89,3 @@ examples/jeopardy/results.txt poetry.lock poetry.toml - -# Test binaries -/tests/test-grammar-parser -/tests/test-llama-grammar -/tests/test-double-float -/tests/test-grad0 -/tests/test-opt -/tests/test-quantize-fns -/tests/test-quantize-perf -/tests/test-sampling -/tests/test-tokenizer-0-llama -/tests/test-tokenizer-0-falcon -/tests/test-tokenizer-1-llama -/tests/test-tokenizer-1-bpe -/tests/test-rope -/tests/test-backend-ops -/tests/test-autorelease diff --git a/Makefile b/Makefile index a8658a596..82c89e87a 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ TEST_TARGETS = \ tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \ tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \ tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \ - tests/test-backend-ops tests/test-autorelease + tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease # Code coverage output files COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report @@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/ci/run.sh b/ci/run.sh index 791b17a19..2427e55a2 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -22,9 +22,9 @@ mkdir -p "$2" OUT=$(realpath "$1") MNT=$(realpath "$2") -rm -v $OUT/*.log -rm -v $OUT/*.exit -rm -v $OUT/*.md +rm -f "$OUT/*.log" +rm -f "$OUT/*.exit" +rm -f "$OUT/*.md" sd=`dirname $0` cd $sd/../ @@ -94,7 +94,7 @@ function gg_run_ctest_debug { (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log set +e } @@ -123,9 +123,9 @@ function gg_run_ctest_release { (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log if [ -z ${GG_BUILD_LOW_PERF} ]; then - (time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log + (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log else - (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log fi set +e @@ -141,6 +141,61 @@ function gg_sum_ctest_release { gg_printf '```\n' } +function gg_get_model { + local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf" + local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" + if [[ -s $gguf_3b ]]; then + echo -n "$gguf_3b" + elif [[ -s $gguf_7b ]]; then + echo -n "$gguf_7b" + else + echo >&2 "No model found. Can't run gg_run_ctest_with_model." + exit 1 + fi +} + +function gg_run_ctest_with_model_debug { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-debug + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_run_ctest_with_model_release { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-release + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_sum_ctest_with_model_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +function gg_sum_ctest_with_model_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + # open_llama_3b_v2 function gg_run_open_llama_3b_v2 { @@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 { wiki_test_60="${path_wiki}/wiki.test-60.raw" - ./bin/test-autorelease ${model_f16} - ./bin/quantize ${model_f16} ${model_q8_0} q8_0 ./bin/quantize ${model_f16} ${model_q4_0} q4_0 ./bin/quantize ${model_f16} ${model_q4_1} q4_1 @@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 { ## main if [ -z ${GG_BUILD_LOW_PERF} ]; then + # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt rm -rf ${SRC}/models-mnt - mnt_models=${MNT}/models mkdir -p ${mnt_models} ln -sfn ${mnt_models} ${SRC}/models-mnt - python3 -m pip install -r ${SRC}/requirements.txt - python3 -m pip install --editable gguf-py + # Create a fresh python3 venv and enter it + python3 -m venv "$MNT/venv" + source "$MNT/venv/bin/activate" + + pip install -r ${SRC}/requirements.txt --disable-pip-version-check + pip install --editable gguf-py --disable-pip-version-check fi ret=0 @@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then else test $ret -eq 0 && gg_run open_llama_7b_v2 fi + test $ret -eq 0 && gg_run ctest_with_model_debug + test $ret -eq 0 && gg_run ctest_with_model_release fi fi diff --git a/scripts/ci-run.sh b/scripts/ci-run.sh new file mode 100755 index 000000000..06b5d9c6e --- /dev/null +++ b/scripts/ci-run.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -euo pipefail +this=$(realpath "$0"); readonly this +cd "$(dirname "$this")" +shellcheck "$this" + +if (( $# != 1 && $# != 2 )); then + cat >&2 <<'EOF' +usage: + ci-run.sh [] + +This script wraps ci/run.sh: +* If is a ramdisk, you can reduce writes to your SSD. If is not a ramdisk, keep in mind that total writes will increase by the size of . + (openllama_3b_v2: quantized models are about 30GB) +* Persistent model and data files are synced to and from , + excluding generated .gguf files. + (openllama_3b_v2: persistent files are about 6.6GB) +* defaults to ~/.cache/llama.cpp +EOF + exit 1 +fi + +cd .. # => llama.cpp repo root + +tmp="$1" +mkdir -p "$tmp" +tmp=$(realpath "$tmp") +echo >&2 "Using tmp=$tmp" + +cache="${2-$HOME/.cache/llama.cpp}" +mkdir -p "$cache" +cache=$(realpath "$cache") +echo >&2 "Using cache=$cache" + +_sync() { + local from="$1"; shift + local to="$1"; shift + + echo >&2 "Syncing from $from to $to" + mkdir -p "$from" "$to" + rsync -a "$from" "$to" --delete-during "$@" +} + +_sync "$(realpath .)/" "$tmp/llama.cpp" +_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/" + +cd "$tmp/llama.cpp" +bash ci/run.sh ci-out ci-mnt + +_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 000000000..59be43b99 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,2 @@ +* +!*.* diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d7aaab843..3e40a78cd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,6 @@ function(llama_build_executable source) get_filename_component(TEST_TARGET ${source} NAME_WE) - add_executable(${TEST_TARGET} ${source}) + add_executable(${TEST_TARGET} ${source} get-model.cpp) install(TARGETS ${TEST_TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE common) endfunction() @@ -8,14 +8,20 @@ endfunction() function(llama_test_executable name source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_test(NAME ${name} COMMAND $ ${ARGN}) + set_property(TEST ${name} PROPERTY LABELS "main") endfunction() function(llama_build_and_test_executable source) + llama_build_and_test_executable_with_label(${source} "main") +endfunction() + +function(llama_build_and_test_executable_with_label source label) get_filename_component(TEST_TARGET ${source} NAME_WE) - add_executable(${TEST_TARGET} ${source}) + add_executable(${TEST_TARGET} ${source} get-model.cpp) install(TARGETS ${TEST_TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE common) add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) + set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label}) endfunction() # llama_build_and_test_executable(test-double-float.cpp) # SLOW @@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp) llama_build_and_test_executable(test-grad0.cpp) # llama_build_and_test_executable(test-opt.cpp) # SLOW llama_build_and_test_executable(test-backend-ops.cpp) -llama_build_and_test_executable(test-autorelease.cpp) llama_build_and_test_executable(test-rope.cpp) +llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model") +llama_build_and_test_executable_with_label(test-autorelease.cpp "model") + # dummy executable - not installed get_filename_component(TEST_TARGET test-c.c NAME_WE) add_executable(${TEST_TARGET} test-c.c) diff --git a/tests/get-model.cpp b/tests/get-model.cpp new file mode 100644 index 000000000..4edb685f0 --- /dev/null +++ b/tests/get-model.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +#include "get-model.h" + +char * get_model_or_exit(int argc, char *argv[]) { + char * model_path; + if (argc > 1) { + model_path = argv[1]; + + } else { + model_path = getenv("LLAMACPP_TEST_MODELFILE"); + if (!model_path || strlen(model_path) == 0) { + fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE= to silence this warning and run this test.\n\033[0m"); + exit(EXIT_SUCCESS); + } + } + + return model_path; +} diff --git a/tests/get-model.h b/tests/get-model.h new file mode 100644 index 000000000..81a3a0fef --- /dev/null +++ b/tests/get-model.h @@ -0,0 +1,2 @@ +#pragma once +char * get_model_or_exit(int, char*[]); diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp index 289c6ba6c..36a23c0bb 100644 --- a/tests/test-autorelease.cpp +++ b/tests/test-autorelease.cpp @@ -5,19 +5,15 @@ #include #include "llama.h" +#include "get-model.h" // This creates a new context inside a pthread and then tries to exit cleanly. int main(int argc, char ** argv) { - if (argc < 2) { - printf("Usage: %s model.gguf\n", argv[0]); - return 0; // intentionally return success - } + auto * model_path = get_model_or_exit(argc, argv); - const std::string fname = argv[1]; - - std::thread([&fname]() { + std::thread([&model_path]() { llama_backend_init(false); - auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params()); + auto * model = llama_load_model_from_file(model_path, llama_model_default_params()); auto * ctx = llama_new_context_with_model(model, llama_context_default_params()); llama_free(ctx); llama_free_model(model); diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp new file mode 100644 index 000000000..7ea4bbacc --- /dev/null +++ b/tests/test-model-load-cancel.cpp @@ -0,0 +1,27 @@ +#include "llama.h" +#include "get-model.h" + +#include + +int main(int argc, char *argv[] ) { + auto * model_path = get_model_or_exit(argc, argv); + auto * file = fopen(model_path, "r"); + if (file == nullptr) { + fprintf(stderr, "no model at '%s' found\n", model_path); + return EXIT_FAILURE; + } + + fprintf(stderr, "using '%s'\n", model_path); + fclose(file); + + llama_backend_init(false); + auto params = llama_model_params{}; + params.use_mmap = false; + params.progress_callback = [](float progress, void * ctx){ + (void) ctx; + return progress > 0.50; + }; + auto * model = llama_load_model_from_file(model_path, params); + llama_backend_free(); + return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE; +}