From 3bc10cb485dd7efa4da6c64e73ad0c9e2bfe0821 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 20 May 2024 15:10:03 +0300 Subject: [PATCH] server : fix temperature + disable some tests (#7409) * server : fix temperature * server : disable tests relying on parallel determinism * ci : change server Debug -> RelWithDebInfo --- .github/workflows/server.yml | 7 +------ examples/server/tests/features/results.feature | 17 ++++++++--------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 217af67cf..0789efd18 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -33,13 +33,10 @@ jobs: strategy: matrix: sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] + build_type: [RelWithDebInfo] include: - build_type: Release sanitizer: "" - - build_type: Debug - sanitizer: THREAD - disabled_on_pr: true fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken steps: @@ -103,10 +100,8 @@ jobs: -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server - - name: Tests id: server_integration_tests - if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | cd examples/server/tests PORT=8888 ./tests.sh diff --git a/examples/server/tests/features/results.feature b/examples/server/tests/features/results.feature index 4ab8ad20c..e8e1b5414 100644 --- a/examples/server/tests/features/results.feature +++ b/examples/server/tests/features/results.feature @@ -13,7 +13,7 @@ Feature: Results Scenario Outline: consistent results with same seed Given slots - And 0.0 temperature + And 1.0 temperature Then the server is starting Then the server is healthy @@ -27,7 +27,8 @@ Feature: Results Examples: | n_slots | | 1 | - | 2 | + # FIXME: unified KV cache nondeterminism + # | 2 | Scenario Outline: different results with different seed Given slots @@ -73,14 +74,13 @@ Feature: Results Examples: | n_parallel | temp | | 1 | 0.0 | - | 2 | 0.0 | - | 4 | 0.0 | | 1 | 1.0 | - # FIXME: These tests fail on master. - # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism. + # FIXME: unified KV cache nondeterminism # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227 # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574 # and https://github.com/ggerganov/llama.cpp/pull/7347 . + # | 2 | 0.0 | + # | 4 | 0.0 | # | 2 | 1.0 | # | 4 | 1.0 | @@ -108,12 +108,11 @@ Feature: Results Examples: | n_slots | n_kv | n_predict | n_parallel | | 4 | 1024 | 1 | 1 | - | 4 | 1024 | 1 | 4 | - # FIXME: These tests fail on master. - # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism. + # FIXME: unified KV cache nondeterminism # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227 # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574 # and https://github.com/ggerganov/llama.cpp/pull/7347 . + # | 4 | 1024 | 1 | 4 | # | 4 | 1024 | 100 | 1 | # This test still fails even the above patches; the first token probabilities are already different. # | 4 | 1024 | 100 | 4 |