ci : add model tests + script wrapper (#4586)

* scripts : add lib.sh and lib_test.sh

* scripts : stub out new ci-run.sh script

* scripts : switch to PascalCase for functions

This looks a little odd at first, but I find it very useful as a
convention to know if a command is part of our code vs a builtin.

* scripts : add some fancy conversion from snake_case to PascalCase

* Add venv to ci/run.sh

* Revert scripts work

* scripts : add wrapper script for local use of ci/run.sh

* Simplify .gitignore for tests, clang-tidy fixes

* Label all ctest tests

* ci : ctest uses -L main

* Attempt at writing ctest_with_model

* Update test-model-load-cancel

* ci : add ctest_with_model for debug and release

ggml-ci

* Fix gg_get_model function

ggml-ci

* got stuck on CMake

* Add get_model.cpp to tests/CMakeLists.txt

ggml-ci

* Fix README.md output for ctest_with_model

ggml-ci

* workflows : use `-L main` for all ctest

ggml-ci

* Fixes

* GG_RUN_CTEST_MODELFILE => LLAMACPP_TESTMODELFILE
* Always show warning rather than failing if model file variable is not
  set

* scripts : update usage text for ci-run.sh
This commit is contained in:
crasm 2024-01-26 07:18:00 -05:00 committed by GitHub
parent 6dd3c28c9c
commit 413e7b0559
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 199 additions and 48 deletions

View File

@ -72,7 +72,7 @@ jobs:
id: cmake_test id: cmake_test
run: | run: |
cd build cd build
ctest --verbose --timeout 900 ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-sanitizer: ubuntu-latest-cmake-sanitizer:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -107,7 +107,7 @@ jobs:
id: cmake_test id: cmake_test
run: | run: |
cd build cd build
ctest --verbose --timeout 900 ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-mpi: ubuntu-latest-cmake-mpi:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -141,7 +141,7 @@ jobs:
id: cmake_test id: cmake_test
run: | run: |
cd build cd build
ctest --verbose ctest -L main --verbose
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it. # how to debug it.
@ -202,7 +202,7 @@ jobs:
id: cmake_test id: cmake_test
run: | run: |
cd build cd build
ctest --verbose --timeout 900 ctest -L main --verbose --timeout 900
macOS-latest-cmake-ios: macOS-latest-cmake-ios:
runs-on: macos-latest runs-on: macos-latest
@ -394,7 +394,7 @@ jobs:
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512 if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
run: | run: |
cd build cd build
ctest -C Release --verbose --timeout 900 ctest -L main -C Release --verbose --timeout 900
- name: Test (Intel SDE) - name: Test (Intel SDE)
id: cmake_test_sde id: cmake_test_sde
@ -406,7 +406,7 @@ jobs:
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build cd build
& $sde -future -- ctest -C Release --verbose --timeout 900 & $sde -future -- ctest -L main -C Release --verbose --timeout 900
- name: Determine tag name - name: Determine tag name
id: tag id: tag

19
.gitignore vendored
View File

@ -27,7 +27,7 @@
lcov-report/ lcov-report/
gcovr-report/ gcovr-report/
build*/ build*
out/ out/
tmp/ tmp/
@ -89,20 +89,3 @@ examples/jeopardy/results.txt
poetry.lock poetry.lock
poetry.toml poetry.toml
# Test binaries
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-double-float
/tests/test-grad0
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-sampling
/tests/test-tokenizer-0-llama
/tests/test-tokenizer-0-falcon
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
/tests/test-autorelease

View File

@ -9,7 +9,7 @@ TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \ tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \ tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \ tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
tests/test-backend-ops tests/test-autorelease tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
# Code coverage output files # Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS) tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

View File

@ -22,9 +22,9 @@ mkdir -p "$2"
OUT=$(realpath "$1") OUT=$(realpath "$1")
MNT=$(realpath "$2") MNT=$(realpath "$2")
rm -v $OUT/*.log rm -f "$OUT/*.log"
rm -v $OUT/*.exit rm -f "$OUT/*.exit"
rm -v $OUT/*.md rm -f "$OUT/*.md"
sd=`dirname $0` sd=`dirname $0`
cd $sd/../ cd $sd/../
@ -94,7 +94,7 @@ function gg_run_ctest_debug {
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e set +e
} }
@ -123,9 +123,9 @@ function gg_run_ctest_release {
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
if [ -z ${GG_BUILD_LOW_PERF} ]; then if [ -z ${GG_BUILD_LOW_PERF} ]; then
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
else else
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
fi fi
set +e set +e
@ -141,6 +141,61 @@ function gg_sum_ctest_release {
gg_printf '```\n' gg_printf '```\n'
} }
function gg_get_model {
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
if [[ -s $gguf_3b ]]; then
echo -n "$gguf_3b"
elif [[ -s $gguf_7b ]]; then
echo -n "$gguf_7b"
else
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
exit 1
fi
}
function gg_run_ctest_with_model_debug {
cd ${SRC}
local model; model=$(gg_get_model)
cd build-ci-debug
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}
function gg_run_ctest_with_model_release {
cd ${SRC}
local model; model=$(gg_get_model)
cd build-ci-release
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}
function gg_sum_ctest_with_model_debug {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs ctest with model files in debug mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}
function gg_sum_ctest_with_model_release {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs ctest with model files in release mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}
# open_llama_3b_v2 # open_llama_3b_v2
function gg_run_open_llama_3b_v2 { function gg_run_open_llama_3b_v2 {
@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 {
wiki_test_60="${path_wiki}/wiki.test-60.raw" wiki_test_60="${path_wiki}/wiki.test-60.raw"
./bin/test-autorelease ${model_f16}
./bin/quantize ${model_f16} ${model_q8_0} q8_0 ./bin/quantize ${model_f16} ${model_q8_0} q8_0
./bin/quantize ${model_f16} ${model_q4_0} q4_0 ./bin/quantize ${model_f16} ${model_q4_0} q4_0
./bin/quantize ${model_f16} ${model_q4_1} q4_1 ./bin/quantize ${model_f16} ${model_q4_1} q4_1
@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 {
## main ## main
if [ -z ${GG_BUILD_LOW_PERF} ]; then if [ -z ${GG_BUILD_LOW_PERF} ]; then
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt rm -rf ${SRC}/models-mnt
mnt_models=${MNT}/models mnt_models=${MNT}/models
mkdir -p ${mnt_models} mkdir -p ${mnt_models}
ln -sfn ${mnt_models} ${SRC}/models-mnt ln -sfn ${mnt_models} ${SRC}/models-mnt
python3 -m pip install -r ${SRC}/requirements.txt # Create a fresh python3 venv and enter it
python3 -m pip install --editable gguf-py python3 -m venv "$MNT/venv"
source "$MNT/venv/bin/activate"
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
pip install --editable gguf-py --disable-pip-version-check
fi fi
ret=0 ret=0
@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
else else
test $ret -eq 0 && gg_run open_llama_7b_v2 test $ret -eq 0 && gg_run open_llama_7b_v2
fi fi
test $ret -eq 0 && gg_run ctest_with_model_debug
test $ret -eq 0 && gg_run ctest_with_model_release
fi fi
fi fi

50
scripts/ci-run.sh Executable file
View File

@ -0,0 +1,50 @@
#!/bin/bash
set -euo pipefail
this=$(realpath "$0"); readonly this
cd "$(dirname "$this")"
shellcheck "$this"
if (( $# != 1 && $# != 2 )); then
cat >&2 <<'EOF'
usage:
ci-run.sh <tmp_dir> [<cache_dir>]
This script wraps ci/run.sh:
* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
(openllama_3b_v2: quantized models are about 30GB)
* Persistent model and data files are synced to and from <cache_dir>,
excluding generated .gguf files.
(openllama_3b_v2: persistent files are about 6.6GB)
* <cache_dir> defaults to ~/.cache/llama.cpp
EOF
exit 1
fi
cd .. # => llama.cpp repo root
tmp="$1"
mkdir -p "$tmp"
tmp=$(realpath "$tmp")
echo >&2 "Using tmp=$tmp"
cache="${2-$HOME/.cache/llama.cpp}"
mkdir -p "$cache"
cache=$(realpath "$cache")
echo >&2 "Using cache=$cache"
_sync() {
local from="$1"; shift
local to="$1"; shift
echo >&2 "Syncing from $from to $to"
mkdir -p "$from" "$to"
rsync -a "$from" "$to" --delete-during "$@"
}
_sync "$(realpath .)/" "$tmp/llama.cpp"
_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
cd "$tmp/llama.cpp"
bash ci/run.sh ci-out ci-mnt
_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P

2
tests/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!*.*

View File

@ -1,6 +1,6 @@
function(llama_build_executable source) function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common) target_link_libraries(${TEST_TARGET} PRIVATE common)
endfunction() endfunction()
@ -8,14 +8,20 @@ endfunction()
function(llama_test_executable name source) function(llama_test_executable name source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${name} PROPERTY LABELS "main")
endfunction() endfunction()
function(llama_build_and_test_executable source) function(llama_build_and_test_executable source)
llama_build_and_test_executable_with_label(${source} "main")
endfunction()
function(llama_build_and_test_executable_with_label source label)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common) target_link_libraries(${TEST_TARGET} PRIVATE common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
endfunction() endfunction()
# llama_build_and_test_executable(test-double-float.cpp) # SLOW # llama_build_and_test_executable(test-double-float.cpp) # SLOW
@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp) llama_build_and_test_executable(test-grad0.cpp)
# llama_build_and_test_executable(test-opt.cpp) # SLOW # llama_build_and_test_executable(test-opt.cpp) # SLOW
llama_build_and_test_executable(test-backend-ops.cpp) llama_build_and_test_executable(test-backend-ops.cpp)
llama_build_and_test_executable(test-autorelease.cpp)
llama_build_and_test_executable(test-rope.cpp) llama_build_and_test_executable(test-rope.cpp)
llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
# dummy executable - not installed # dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE) get_filename_component(TEST_TARGET test-c.c NAME_WE)
add_executable(${TEST_TARGET} test-c.c) add_executable(${TEST_TARGET} test-c.c)

21
tests/get-model.cpp Normal file
View File

@ -0,0 +1,21 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "get-model.h"
char * get_model_or_exit(int argc, char *argv[]) {
char * model_path;
if (argc > 1) {
model_path = argv[1];
} else {
model_path = getenv("LLAMACPP_TEST_MODELFILE");
if (!model_path || strlen(model_path) == 0) {
fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
exit(EXIT_SUCCESS);
}
}
return model_path;
}

2
tests/get-model.h Normal file
View File

@ -0,0 +1,2 @@
#pragma once
char * get_model_or_exit(int, char*[]);

View File

@ -5,19 +5,15 @@
#include <thread> #include <thread>
#include "llama.h" #include "llama.h"
#include "get-model.h"
// This creates a new context inside a pthread and then tries to exit cleanly. // This creates a new context inside a pthread and then tries to exit cleanly.
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
if (argc < 2) { auto * model_path = get_model_or_exit(argc, argv);
printf("Usage: %s model.gguf\n", argv[0]);
return 0; // intentionally return success
}
const std::string fname = argv[1]; std::thread([&model_path]() {
std::thread([&fname]() {
llama_backend_init(false); llama_backend_init(false);
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params()); auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
auto * ctx = llama_new_context_with_model(model, llama_context_default_params()); auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
llama_free(ctx); llama_free(ctx);
llama_free_model(model); llama_free_model(model);

View File

@ -0,0 +1,27 @@
#include "llama.h"
#include "get-model.h"
#include <cstdlib>
int main(int argc, char *argv[] ) {
auto * model_path = get_model_or_exit(argc, argv);
auto * file = fopen(model_path, "r");
if (file == nullptr) {
fprintf(stderr, "no model at '%s' found\n", model_path);
return EXIT_FAILURE;
}
fprintf(stderr, "using '%s'\n", model_path);
fclose(file);
llama_backend_init(false);
auto params = llama_model_params{};
params.use_mmap = false;
params.progress_callback = [](float progress, void * ctx){
(void) ctx;
return progress > 0.50;
};
auto * model = llama_load_model_from_file(model_path, params);
llama_backend_free();
return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE;
}