mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-30 13:24:35 +00:00
525213d2f5
* server: tests: init scenarios - health and slots endpoints - completion endpoint - OAI compatible chat completion requests w/ and without streaming - completion multi users scenario - multi users scenario on OAI compatible endpoint with streaming - multi users with total number of tokens to predict exceeds the KV Cache size - server wrong usage scenario, like in Infinite loop of "context shift" #3969 - slots shifting - continuous batching - embeddings endpoint - multi users embedding endpoint: Segmentation fault #5655 - OpenAI-compatible embeddings API - tokenize endpoint - CORS and api key scenario * server: CI GitHub workflow --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
13 lines
184 B
Bash
Executable File
13 lines
184 B
Bash
Executable File
#!/bin/bash
|
|
|
|
set -eu
|
|
|
|
if [ $# -lt 1 ]
|
|
then
|
|
# Start @llama.cpp scenario
|
|
behave --summary --stop --no-capture --exclude 'issues|wrong_usages' --tags llama.cpp
|
|
else
|
|
behave "$@"
|
|
fi
|
|
|