tool-call: add tests: tool_call=none, parallel_tool_calls=true

2025-01-13 04:00:16 +00:00 · 2024-10-28 10:04:00 +00:00 · 2024-10-28 10:04:00 +00:00 · ec547e4137
commit ec547e4137
parent 168add7ec8
3 changed files with 53 additions and 4 deletions
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -746,6 +746,23 @@ async def step_tool_called(context, expected_name, expected_arguments):
        assert_n_tokens_predicted(result, tool_calls_check=check)
    assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"

+
+@step('receiving the following tool calls: {expected_tool_calls}')
+async def step_receiving_tool_calls(context, expected_tool_calls):
+    tool_caexpected_tool_callslls = json.loads(expected_tool_calls)
+    n_completions = await gather_tasks_results(context)
+    assert n_completions > 0
+
+    for i in range(n_completions):
+        result = context.tasks_result.pop()
+
+        def check(tool_calls):
+            assert json.dumps(expected_tool_calls) == json.dumps(tool_calls), f"tool calls: {tool_calls}, expected: {expected_tool_calls}, result = {result}"
+
+        assert_n_tokens_predicted(result, tool_calls_check=check)
+    assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
+
+
@step('no tool is called')
@async_run_until_complete
 async def step_tool_called(context):
--- a/examples/server/tests/features/tool_call.feature
+++ b/examples/server/tests/features/tool_call.feature
@ -113,3 +113,35 @@ Feature: llama.cpp server
    And   parallel tool calls is disabled
    And   an OAI compatible chat completions request with no api error
    Then  no tool is called
+
+
+  @slow
+  Scenario Outline: Python hello world w/o none tool_choice yields no tool call
+    Given a model file Phi-3.5-mini-instruct-Q4_K_M.gguf from HF repo bartowski/Phi-3.5-mini-instruct-GGUF
+    And   no warmup
+    And   the server is starting
+    And   the server is healthy
+    And   a model test
+    And   256 max tokens to predict
+    And   a user prompt write a hello world in python
+    And   a tool choice none
+    And   python tool
+    And   parallel tool calls is disabled
+    And   an OAI compatible chat completions request with no api error
+    Then  no tool is called
+
+
+  @slow
+  Scenario: Parallel tool calls
+    Given a model file Mistral-Nemo-Instruct-2407-Q8_0.gguf from HF repo bartowski/Mistral-Nemo-Instruct-2407-GGUF
+    And   a test chat template file named mistralai-Mistral-Nemo-Instruct-2407
+    And   no warmup
+    And   the server is starting
+    And   the server is healthy
+    And   a model test
+    And   256 max tokens to predict
+    And   a user prompt get the weather in paris and search for llama.cpp's latest commits
+    And   python tool
+    And   parallel tool calls is enabled
+    And   an OAI compatible chat completions request with no api error
+    Then  receiving the following tool calls: [{"arguments": {"code": "import requests\nresponse = requests.get('https://api.openweathermap.org/data/2.9/weather?q=Paris&appid=YOUR_API_KEY')\nprint(response.json())"}, "name": "ipython" , "id": "123456789"}, {"arguments": {"code": "!git log --oneline --after 2024-01-01 --before 2024-12-31 llama.cpp" }, "name": "ipython" , "id": "987654321"}]