mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-11-14 06:49:54 +00:00
server : fix temperature + disable some tests (#7409)
* server : fix temperature * server : disable tests relying on parallel determinism * ci : change server Debug -> RelWithDebInfo
This commit is contained in:
parent
6bf9b66fa3
commit
3bc10cb485
7
.github/workflows/server.yml
vendored
7
.github/workflows/server.yml
vendored
@ -33,13 +33,10 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||||
build_type: [Debug]
|
build_type: [RelWithDebInfo]
|
||||||
include:
|
include:
|
||||||
- build_type: Release
|
- build_type: Release
|
||||||
sanitizer: ""
|
sanitizer: ""
|
||||||
- build_type: Debug
|
|
||||||
sanitizer: THREAD
|
|
||||||
disabled_on_pr: true
|
|
||||||
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@ -103,10 +100,8 @@ jobs:
|
|||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
|
||||||
|
|
||||||
|
|
||||||
- name: Tests
|
- name: Tests
|
||||||
id: server_integration_tests
|
id: server_integration_tests
|
||||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
PORT=8888 ./tests.sh
|
PORT=8888 ./tests.sh
|
||||||
|
@ -13,7 +13,7 @@ Feature: Results
|
|||||||
|
|
||||||
Scenario Outline: consistent results with same seed
|
Scenario Outline: consistent results with same seed
|
||||||
Given <n_slots> slots
|
Given <n_slots> slots
|
||||||
And 0.0 temperature
|
And 1.0 temperature
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
Then the server is healthy
|
Then the server is healthy
|
||||||
|
|
||||||
@ -27,7 +27,8 @@ Feature: Results
|
|||||||
Examples:
|
Examples:
|
||||||
| n_slots |
|
| n_slots |
|
||||||
| 1 |
|
| 1 |
|
||||||
| 2 |
|
# FIXME: unified KV cache nondeterminism
|
||||||
|
# | 2 |
|
||||||
|
|
||||||
Scenario Outline: different results with different seed
|
Scenario Outline: different results with different seed
|
||||||
Given <n_slots> slots
|
Given <n_slots> slots
|
||||||
@ -73,14 +74,13 @@ Feature: Results
|
|||||||
Examples:
|
Examples:
|
||||||
| n_parallel | temp |
|
| n_parallel | temp |
|
||||||
| 1 | 0.0 |
|
| 1 | 0.0 |
|
||||||
| 2 | 0.0 |
|
|
||||||
| 4 | 0.0 |
|
|
||||||
| 1 | 1.0 |
|
| 1 | 1.0 |
|
||||||
# FIXME: These tests fail on master.
|
# FIXME: unified KV cache nondeterminism
|
||||||
# Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism.
|
|
||||||
# See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
|
# See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
|
||||||
# and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
|
# and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
|
||||||
# and https://github.com/ggerganov/llama.cpp/pull/7347 .
|
# and https://github.com/ggerganov/llama.cpp/pull/7347 .
|
||||||
|
# | 2 | 0.0 |
|
||||||
|
# | 4 | 0.0 |
|
||||||
# | 2 | 1.0 |
|
# | 2 | 1.0 |
|
||||||
# | 4 | 1.0 |
|
# | 4 | 1.0 |
|
||||||
|
|
||||||
@ -108,12 +108,11 @@ Feature: Results
|
|||||||
Examples:
|
Examples:
|
||||||
| n_slots | n_kv | n_predict | n_parallel |
|
| n_slots | n_kv | n_predict | n_parallel |
|
||||||
| 4 | 1024 | 1 | 1 |
|
| 4 | 1024 | 1 | 1 |
|
||||||
| 4 | 1024 | 1 | 4 |
|
# FIXME: unified KV cache nondeterminism
|
||||||
# FIXME: These tests fail on master.
|
|
||||||
# Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism.
|
|
||||||
# See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
|
# See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227
|
||||||
# and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
|
# and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574
|
||||||
# and https://github.com/ggerganov/llama.cpp/pull/7347 .
|
# and https://github.com/ggerganov/llama.cpp/pull/7347 .
|
||||||
|
# | 4 | 1024 | 1 | 4 |
|
||||||
# | 4 | 1024 | 100 | 1 |
|
# | 4 | 1024 | 100 | 1 |
|
||||||
# This test still fails even the above patches; the first token probabilities are already different.
|
# This test still fails even the above patches; the first token probabilities are already different.
|
||||||
# | 4 | 1024 | 100 | 4 |
|
# | 4 | 1024 | 100 | 4 |
|
||||||
|
Loading…
Reference in New Issue
Block a user