From ec547e4137b76d4d4d0a03f63113d2655ddc5bc5 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 28 Oct 2024 10:04:00 +0000 Subject: [PATCH] `tool-call`: add tests: tool_call=none, parallel_tool_calls=true --- examples/server/tests/features/steps/steps.py | 17 ++++++++++ .../server/tests/features/tool_call.feature | 34 ++++++++++++++++++- scripts/fetch_server_test_models.py | 6 ++-- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 142356931..156ebf0be 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -746,6 +746,23 @@ async def step_tool_called(context, expected_name, expected_arguments): assert_n_tokens_predicted(result, tool_calls_check=check) assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests" + +@step('receiving the following tool calls: {expected_tool_calls}') +async def step_receiving_tool_calls(context, expected_tool_calls): + tool_caexpected_tool_callslls = json.loads(expected_tool_calls) + n_completions = await gather_tasks_results(context) + assert n_completions > 0 + + for i in range(n_completions): + result = context.tasks_result.pop() + + def check(tool_calls): + assert json.dumps(expected_tool_calls) == json.dumps(tool_calls), f"tool calls: {tool_calls}, expected: {expected_tool_calls}, result = {result}" + + assert_n_tokens_predicted(result, tool_calls_check=check) + assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests" + + @step('no tool is called') @async_run_until_complete async def step_tool_called(context): diff --git a/examples/server/tests/features/tool_call.feature b/examples/server/tests/features/tool_call.feature index 7b332f015..7ef7a10ee 100644 --- a/examples/server/tests/features/tool_call.feature +++ b/examples/server/tests/features/tool_call.feature @@ -92,7 +92,7 @@ Feature: llama.cpp server | tool_name | tool_arguments | hf_repo | hf_file | template_override | | ipython | {"code": "print('Hello, World!')"} | bartowski/Phi-3.5-mini-instruct-GGUF | Phi-3.5-mini-instruct-Q4_K_M.gguf | | | ipython | {"code": "print('Hello, World!')"} | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF | Hermes-2-Pro-Llama-3-8B-Q8_0.gguf | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | - | ipython | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q8_0.gguf | mistralai-Mistral-Nemo-Instruct-2407 | + | ipython | {"code": "print('Hello, World!')"} | bartowski/Mistral-Nemo-Instruct-2407-GGUF | Mistral-Nemo-Instruct-2407-Q8_0.gguf | mistralai-Mistral-Nemo-Instruct-2407 | | ipython | {"code": "print('Hello, World!'}"} | lmstudio-community/Llama-3.2-1B-Instruct-GGUF | Llama-3.2-1B-Instruct-Q4_K_M.gguf | meta-llama-Llama-3.2-3B-Instruct | | ipython | {"code": "print("} | lmstudio-community/Llama-3.2-3B-Instruct-GGUF | Llama-3.2-3B-Instruct-Q6_K.gguf | meta-llama-Llama-3.2-3B-Instruct | | ipython | {"code": "print("} | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF | Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf | | @@ -113,3 +113,35 @@ Feature: llama.cpp server And parallel tool calls is disabled And an OAI compatible chat completions request with no api error Then no tool is called + + + @slow + Scenario Outline: Python hello world w/o none tool_choice yields no tool call + Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF + And no warmup + And the server is starting + And the server is healthy + And a model test + And 256 max tokens to predict + And a user prompt write a hello world in python + And a tool choice none + And python tool + And parallel tool calls is disabled + And an OAI compatible chat completions request with no api error + Then no tool is called + + + @slow + Scenario: Parallel tool calls + Given a model file Mistral-Nemo-Instruct-2407-Q8_0.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF + And a test chat template file named mistralai-Mistral-Nemo-Instruct-2407 + And no warmup + And the server is starting + And the server is healthy + And a model test + And 256 max tokens to predict + And a user prompt get the weather in paris and search for llama.cpp's latest commits + And python tool + And parallel tool calls is enabled + And an OAI compatible chat completions request with no api error + Then receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}] diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py index c2021c335..2686954aa 100644 --- a/scripts/fetch_server_test_models.py +++ b/scripts/fetch_server_test_models.py @@ -1,10 +1,10 @@ ''' This script fetches all the models used in the server tests. - + This is useful for slow tests that use larger models, to avoid them timing out on the model downloads. - + It is meant to be run from the root of the repository. - + Example: python scripts/fetch_server_test_models.py ( cd examples/server/tests && ./tests.sh --tags=slow )