ci : add model tests + script wrapper (#4586)

* scripts : add lib.sh and lib_test.sh * scripts : stub out new ci-run.sh script * scripts : switch to PascalCase for functions This looks a little odd at first, but I find it very useful as a convention to know if a command is part of our code vs a builtin. * scripts : add some fancy conversion from snake_case to PascalCase * Add venv to ci/run.sh * Revert scripts work * scripts : add wrapper script for local use of ci/run.sh * Simplify .gitignore for tests, clang-tidy fixes * Label all ctest tests * ci : ctest uses -L main * Attempt at writing ctest_with_model * Update test-model-load-cancel * ci : add ctest_with_model for debug and release ggml-ci * Fix gg_get_model function ggml-ci * got stuck on CMake * Add get_model.cpp to tests/CMakeLists.txt ggml-ci * Fix README.md output for ctest_with_model ggml-ci * workflows : use `-L main` for all ctest ggml-ci * Fixes * GG_RUN_CTEST_MODELFILE => LLAMACPP_TESTMODELFILE * Always show warning rather than failing if model file variable is not set * scripts : update usage text for ci-run.sh
2024-09-22 21:16:20 +00:00 · 2024-01-26 07:18:00 -05:00 · 2024-01-26 07:18:00 -05:00 · 413e7b0559
commit 413e7b0559
parent 6dd3c28c9c
11 changed files with 199 additions and 48 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -72,7 +72,7 @@ jobs:
        id: cmake_test
        run: |
          cd build
-          ctest --verbose --timeout 900
+          ctest -L main --verbose --timeout 900
  ubuntu-latest-cmake-sanitizer:
    runs-on: ubuntu-latest
@ -107,7 +107,7 @@ jobs:
        id: cmake_test
        run: |
          cd build
-          ctest --verbose --timeout 900
+          ctest -L main --verbose --timeout 900
  ubuntu-latest-cmake-mpi:
    runs-on: ubuntu-latest
@ -141,7 +141,7 @@ jobs:
        id: cmake_test
        run: |
          cd build
-          ctest --verbose
+          ctest -L main --verbose
  # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
  #       how to debug it.
@ -202,7 +202,7 @@ jobs:
        id: cmake_test
        run: |
          cd build
-          ctest --verbose --timeout 900
+          ctest -L main --verbose --timeout 900
  macOS-latest-cmake-ios:
    runs-on: macos-latest
@ -394,7 +394,7 @@ jobs:
        if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
        run: |
          cd build
-          ctest -C Release --verbose --timeout 900
+          ctest -L main -C Release --verbose --timeout 900
      - name: Test (Intel SDE)
        id: cmake_test_sde
@ -406,7 +406,7 @@ jobs:
          7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
          $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
          cd build
-          & $sde -future -- ctest -C Release --verbose --timeout 900
+          & $sde -future -- ctest -L main -C Release --verbose --timeout 900
      - name: Determine tag name
        id: tag
--- a/.gitignore
+++ b/.gitignore
@ -27,7 +27,7 @@
 lcov-report/
 gcovr-report/
-build*/
+build*
 out/
 tmp/
@ -89,20 +89,3 @@ examples/jeopardy/results.txt
 poetry.lock
 poetry.toml
 # Test binaries
 /tests/test-grammar-parser
 /tests/test-llama-grammar
 /tests/test-double-float
 /tests/test-grad0
 /tests/test-opt
 /tests/test-quantize-fns
 /tests/test-quantize-perf
 /tests/test-sampling
 /tests/test-tokenizer-0-llama
 /tests/test-tokenizer-0-falcon
 /tests/test-tokenizer-1-llama
 /tests/test-tokenizer-1-bpe
 /tests/test-rope
 /tests/test-backend-ops
 /tests/test-autorelease
--- a/7
+++ b/7
@ -9,7 +9,7 @@ TEST_TARGETS = \
 	tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
 	tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama          \
 	tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope      \
-	tests/test-backend-ops tests/test-autorelease
+	tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
 tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
--- a/ci/run.sh
+++ b/ci/run.sh
@ -22,9 +22,9 @@ mkdir -p "$2"
 OUT=$(realpath "$1")
 MNT=$(realpath "$2")
-rm -v $OUT/*.log
+rm -f "$OUT/*.log"
-rm -v $OUT/*.exit
+rm -f "$OUT/*.exit"
-rm -v $OUT/*.md
+rm -f "$OUT/*.md"
 sd=`dirname $0`
 cd $sd/../
@ -94,7 +94,7 @@ function gg_run_ctest_debug {
    (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
    (time make -j                                          ) 2>&1 | tee -a $OUT/${ci}-make.log
-    (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+    (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
    set +e
 }
@ -123,9 +123,9 @@ function gg_run_ctest_release {
    (time make -j                                            ) 2>&1 | tee -a $OUT/${ci}-make.log
    if [ -z ${GG_BUILD_LOW_PERF} ]; then
-        (time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+        (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
    else
-        (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+        (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
    fi
    set +e
@ -141,6 +141,61 @@ function gg_sum_ctest_release {
    gg_printf '```\n'
 }
 function gg_get_model {
    local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
    local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
    if [[ -s $gguf_3b ]]; then
        echo -n "$gguf_3b"
    elif [[ -s $gguf_7b ]]; then
        echo -n "$gguf_7b"
    else
        echo >&2 "No model found. Can't run gg_run_ctest_with_model."
        exit 1
    fi
 }
 function gg_run_ctest_with_model_debug {
    cd ${SRC}
    local model; model=$(gg_get_model)
    cd build-ci-debug
    set -e
    (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
    set +e
    cd ..
 }
 function gg_run_ctest_with_model_release {
    cd ${SRC}
    local model; model=$(gg_get_model)
    cd build-ci-release
    set -e
    (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
    set +e
    cd ..
 }
 function gg_sum_ctest_with_model_debug {
    gg_printf '### %s\n\n' "${ci}"
    gg_printf 'Runs ctest with model files in debug mode\n'
    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
    gg_printf '```\n'
    gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
    gg_printf '```\n'
 }
 function gg_sum_ctest_with_model_release {
    gg_printf '### %s\n\n' "${ci}"
    gg_printf 'Runs ctest with model files in release mode\n'
    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
    gg_printf '```\n'
    gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
    gg_printf '```\n'
 }
 # open_llama_3b_v2
 function gg_run_open_llama_3b_v2 {
@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 {
    wiki_test_60="${path_wiki}/wiki.test-60.raw"
    ./bin/test-autorelease ${model_f16}
    ./bin/quantize ${model_f16} ${model_q8_0} q8_0
    ./bin/quantize ${model_f16} ${model_q4_0} q4_0
    ./bin/quantize ${model_f16} ${model_q4_1} q4_1
@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 {
 ## main
 if [ -z ${GG_BUILD_LOW_PERF} ]; then
    # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
    rm -rf ${SRC}/models-mnt
    mnt_models=${MNT}/models
    mkdir -p ${mnt_models}
    ln -sfn ${mnt_models} ${SRC}/models-mnt
-    python3 -m pip install -r ${SRC}/requirements.txt
+    # Create a fresh python3 venv and enter it
-    python3 -m pip install --editable gguf-py
+    python3 -m venv "$MNT/venv"
    source "$MNT/venv/bin/activate"
    pip install -r ${SRC}/requirements.txt --disable-pip-version-check
    pip install --editable gguf-py --disable-pip-version-check
 fi
 ret=0
@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
        else
            test $ret -eq 0 && gg_run open_llama_7b_v2
        fi
        test $ret -eq 0 && gg_run ctest_with_model_debug
        test $ret -eq 0 && gg_run ctest_with_model_release
    fi
 fi
--- a/scripts/ci-run.sh
+++ b/scripts/ci-run.sh
@ -0,0 +1,50 @@
 #!/bin/bash
 set -euo pipefail
 this=$(realpath "$0"); readonly this
 cd "$(dirname "$this")"
 shellcheck "$this"
 if (( $# != 1 && $# != 2  )); then
    cat >&2 <<'EOF'
 usage:
    ci-run.sh <tmp_dir> [<cache_dir>]
 This script wraps ci/run.sh:
 * If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
    (openllama_3b_v2: quantized models are about 30GB)
 * Persistent model and data files are synced to and from <cache_dir>,
    excluding generated .gguf files.
    (openllama_3b_v2: persistent files are about 6.6GB)
 * <cache_dir> defaults to  ~/.cache/llama.cpp
 EOF
    exit 1
 fi
 cd .. # => llama.cpp repo root
 tmp="$1"
 mkdir -p "$tmp"
 tmp=$(realpath "$tmp")
 echo >&2 "Using tmp=$tmp"
 cache="${2-$HOME/.cache/llama.cpp}"
 mkdir -p "$cache"
 cache=$(realpath "$cache")
 echo >&2 "Using cache=$cache"
 _sync() {
    local from="$1"; shift
    local to="$1"; shift
    echo >&2 "Syncing from $from to $to"
    mkdir -p "$from" "$to"
    rsync -a "$from" "$to" --delete-during "$@"
 }
 _sync "$(realpath .)/" "$tmp/llama.cpp"
 _sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
 cd "$tmp/llama.cpp"
 bash ci/run.sh ci-out ci-mnt
 _sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
--- a/tests/.gitignore
+++ b/tests/.gitignore
@ -0,0 +1,2 @@
 *
 !*.*
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,6 +1,6 @@
 function(llama_build_executable source)
    get_filename_component(TEST_TARGET ${source} NAME_WE)
-    add_executable(${TEST_TARGET} ${source})
+    add_executable(${TEST_TARGET} ${source} get-model.cpp)
    install(TARGETS ${TEST_TARGET} RUNTIME)
    target_link_libraries(${TEST_TARGET} PRIVATE common)
 endfunction()
@ -8,14 +8,20 @@ endfunction()
 function(llama_test_executable name source)
    get_filename_component(TEST_TARGET ${source} NAME_WE)
    add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
    set_property(TEST ${name} PROPERTY LABELS "main")
 endfunction()
 function(llama_build_and_test_executable source)
    llama_build_and_test_executable_with_label(${source} "main")
 endfunction()
 function(llama_build_and_test_executable_with_label source label)
    get_filename_component(TEST_TARGET ${source} NAME_WE)
-    add_executable(${TEST_TARGET} ${source})
+    add_executable(${TEST_TARGET} ${source} get-model.cpp)
    install(TARGETS ${TEST_TARGET} RUNTIME)
    target_link_libraries(${TEST_TARGET} PRIVATE common)
    add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
    set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
 endfunction()
 # llama_build_and_test_executable(test-double-float.cpp) # SLOW
@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
 llama_build_and_test_executable(test-grad0.cpp)
 # llama_build_and_test_executable(test-opt.cpp) # SLOW
 llama_build_and_test_executable(test-backend-ops.cpp)
 llama_build_and_test_executable(test-autorelease.cpp)
 llama_build_and_test_executable(test-rope.cpp)
 llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
 llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
 # dummy executable - not installed
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
 add_executable(${TEST_TARGET} test-c.c)
--- a/tests/get-model.cpp
+++ b/tests/get-model.cpp
@ -0,0 +1,21 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "get-model.h"
 char * get_model_or_exit(int argc, char *argv[]) {
    char * model_path;
    if (argc > 1) {
        model_path = argv[1];
    } else {
        model_path = getenv("LLAMACPP_TEST_MODELFILE");
        if (!model_path || strlen(model_path) == 0) {
            fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
            exit(EXIT_SUCCESS);
        }
    }
    return model_path;
 }
--- a/tests/get-model.h
+++ b/tests/get-model.h
@ -0,0 +1,2 @@
 #pragma once
 char * get_model_or_exit(int, char*[]);
--- a/tests/test-autorelease.cpp
+++ b/tests/test-autorelease.cpp
@ -5,19 +5,15 @@
 #include <thread>
 #include "llama.h"
 #include "get-model.h"
 // This creates a new context inside a pthread and then tries to exit cleanly.
 int main(int argc, char ** argv) {
-    if (argc < 2) {
+    auto * model_path = get_model_or_exit(argc, argv);
        printf("Usage: %s model.gguf\n", argv[0]);
        return 0; // intentionally return success
    }
-    const std::string fname = argv[1];
+    std::thread([&model_path]() {
    std::thread([&fname]() {
        llama_backend_init(false);
-        auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
+        auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
        auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
        llama_free(ctx);
        llama_free_model(model);
--- a/tests/test-model-load-cancel.cpp
+++ b/tests/test-model-load-cancel.cpp
@ -0,0 +1,27 @@
 #include "llama.h"
 #include "get-model.h"
 #include <cstdlib>
 int main(int argc, char *argv[] ) {
    auto * model_path = get_model_or_exit(argc, argv);
    auto * file = fopen(model_path, "r");
    if (file == nullptr) {
        fprintf(stderr, "no model at '%s' found\n", model_path);
        return EXIT_FAILURE;
    }
    fprintf(stderr, "using '%s'\n", model_path);
    fclose(file);
    llama_backend_init(false);
    auto params = llama_model_params{};
    params.use_mmap = false;
    params.progress_callback = [](float progress, void * ctx){
        (void) ctx;
        return progress > 0.50;
    };
    auto * model = llama_load_model_from_file(model_path, params);
    llama_backend_free();
    return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE;
 }
		`@ -0,0 +1,2 @@`
							`#pragma once`
							`char * get_model_or_exit(int, char*[]);`