minja: update chat template goldens w/ llama.3.1 arguments workaround

2025-01-13 04:00:16 +00:00 · 2024-09-26 18:10:27 +01:00 · 2024-09-26 18:10:27 +01:00 · 296331bba3
commit 296331bba3
parent 9cfe4d7202
10 changed files with 168 additions and 27 deletions
--- a/tests/chat/contexts/tool_use.json
+++ b/tests/chat/contexts/tool_use.json
@ -12,7 +12,7 @@
          "id": "call_1",
          "type": "function",
          "function": {
-            "arguments": {"code": "print('Hello, World!')"},
+            "arguments": "{\"code\": \"print('Hello, World!')\"}",
            "name": "ipython"
          }
        }
@ -39,7 +39,7 @@
          "id": "call_2",
          "type": "function",
          "function": {
-            "arguments": {"condition":true},
+            "arguments": "{\"condition\":true}",
            "name": "test"
          }
        }
@ -66,7 +66,7 @@
          "id": "call_3",
          "type": "function",
          "function": {
-            "arguments": {"query": "what is truth anyway am I right?"},
+            "arguments": "{\"query\": \"what is truth anyway am I right?\"}",
            "name": "brave_search"
          }
        }
--- a/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-tool_use-tool_use.txt
+++ b/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-tool_use-tool_use.txt
@ -59,9 +59,7 @@ Action:
 [
    {
        "tool_name": "ipython",
-        "parameters": {
-            "code": "print('Hello, World!')"
-        }
+        "parameters": "{\"code\": \"print('Hello, World!')\"}"
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
@ -71,9 +69,7 @@ Action:
 [
    {
        "tool_name": "test",
-        "parameters": {
-            "condition": true
-        }
+        "parameters": "{\"condition\":true}"
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
@ -83,9 +79,7 @@ Action:
 [
    {
        "tool_name": "brave_search",
-        "parameters": {
-            "query": "what is truth anyway am I right?"
-        }
+        "parameters": "{\"query\": \"what is truth anyway am I right?\"}"
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
--- a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-70B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-70B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/Qwen-Qwen2.5-7B-Instruct-tool_use.txt
+++ b/tests/chat/goldens/Qwen-Qwen2.5-7B-Instruct-tool_use.txt
@ -21,7 +21,7 @@ For each function call, return a json object with function name and arguments wi
 Print a hello world message with python.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "ipython", "arguments": {"code": "print('Hello, World!')"}}
+{"name": "ipython", "arguments": "{\"code\": \"print('Hello, World!')\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -33,7 +33,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": "{\"condition\":true}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -45,7 +45,7 @@ Truth is definitely true.<|im_end|>
 Check it on the web.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "brave_search", "arguments": {"query": "what is truth anyway am I right?"}}
+{"name": "brave_search", "arguments": "{\"query\": \"what is truth anyway am I right?\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
--- a/tests/chat/goldens/Qwen-Qwen2.5-Math-7B-Instruct-tool_use.txt
+++ b/tests/chat/goldens/Qwen-Qwen2.5-Math-7B-Instruct-tool_use.txt
@ -21,7 +21,7 @@ For each function call, return a json object with function name and arguments wi
 Print a hello world message with python.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "ipython", "arguments": {"code": "print('Hello, World!')"}}
+{"name": "ipython", "arguments": "{\"code\": \"print('Hello, World!')\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -33,7 +33,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": "{\"condition\":true}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -45,7 +45,7 @@ Truth is definitely true.<|im_end|>
 Check it on the web.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "brave_search", "arguments": {"query": "what is truth anyway am I right?"}}
+{"name": "brave_search", "arguments": "{\"query\": \"what is truth anyway am I right?\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
--- a/tests/chat/goldens/meetkai-functionary-medium-v3.1-tool_use.txt
+++ b/tests/chat/goldens/meetkai-functionary-medium-v3.1-tool_use.txt
@ -1 +1,66 @@
-ERROR: can only concatenate str (not "dict") to str
+<|startoftext|><|start_header_id|>system<|end_header_id|>
+
+
+Cutting Knowledge Date: December 2023
+
+
+You have access to the following functions:
+
+Use the function 'ipython' to 'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.'
+{&#34;name&#34;: &#34;ipython&#34;, &#34;description&#34;: &#34;Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;code&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The code to run in the ipython interpreter.&#34;}}, &#34;required&#34;: [&#34;code&#34;]}}
+
+Use the function 'brave_search' to 'Executes a web search with Brave.'
+{&#34;name&#34;: &#34;brave_search&#34;, &#34;description&#34;: &#34;Executes a web search with Brave.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;query&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The query to search for.&#34;}}, &#34;required&#34;: [&#34;query&#34;]}}
+
+Use the function 'wolfram_alpha' to 'Executes a query with Wolfram Alpha.'
+{&#34;name&#34;: &#34;wolfram_alpha&#34;, &#34;description&#34;: &#34;Executes a query with Wolfram Alpha.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;query&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The query to execute.&#34;}}, &#34;required&#34;: [&#34;query&#34;]}}
+
+Use the function 'test' to 'Runs a test.'
+{&#34;name&#34;: &#34;test&#34;, &#34;description&#34;: &#34;Runs a test.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;condition&#34;: {&#34;type&#34;: &#34;boolean&#34;, &#34;description&#34;: &#34;The condition to test.&#34;}}, &#34;required&#34;: [&#34;condition&#34;]}}
+
+
+Think very carefully before calling functions.
+If a you choose to call a function ONLY reply in the following format:
+<{start_tag}={function_name}>{parameters}{end_tag}
+where
+
+start_tag => `<function`
+parameters => a JSON dict with the function argument name as key and function argument value as value.
+end_tag => `</function>`
+
+Here is an example,
+<function=example_function_name>{"example_name": "example_value"}</function>
+
+Reminder:
+- If looking for real time information use relevant functions before falling back to brave_search
+- Function calls MUST follow the specified format, start with <function= and end with </function>
+- Required parameters MUST be specified
+- Only call one function at a time
+- Put the entire function call reply on one line
+
+<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Print a hello world message with python.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+<function=ipython>{"code": "print('Hello, World!')"}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
+
+{"stdout": "Hello, World!"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Anything else?<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Test a tautology.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+<function=test>{"condition":true}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
+
+true<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Truth is definitely true.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Check it on the web.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+<function=brave_search>{"query": "what is truth anyway am I right?"}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
+
+{"title":"Truth: don't ask the web, ask an LLM instead!","url":"https://en.wikipedia.org/wiki/Truth"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+I don't need the web to answer you but I did check, as you asked. What now?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
--- a/tests/chat/goldens/meetkai-functionary-medium-v3.2-tool_use.txt
+++ b/tests/chat/goldens/meetkai-functionary-medium-v3.2-tool_use.txt
@ -1 +1,70 @@
-ERROR: can only concatenate str (not "dict") to str
+<|startoftext|><|start_header_id|>system<|end_header_id|>
+
+You are capable of executing available function(s) if required.
+Only execute function(s) when absolutely necessary.
+Ask for the required input to:recipient==all
+Use JSON for function arguments.
+Respond in this format:
+>>>${recipient}
+${content}
+Available functions:
+// Supported function definitions that should be called when necessary.
+namespace functions {
+
+// Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.
+type ipython = (_: {
+// The code to run in the ipython interpreter.
+code: string,
+}) => any;
+
+// Executes a web search with Brave.
+type brave_search = (_: {
+// The query to search for.
+query: string,
+}) => any;
+
+// Executes a query with Wolfram Alpha.
+type wolfram_alpha = (_: {
+// The query to execute.
+query: string,
+}) => any;
+
+// Runs a test.
+type test = (_: {
+// The condition to test.
+condition: boolean,
+}) => any;
+
+} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Print a hello world message with python.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>ipython
+{"code": "print('Hello, World!')"}<|eot_id|><|start_header_id|>tool<|end_header_id|>
+
+{"stdout": "Hello, World!"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>all
+Anything else?<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Test a tautology.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>test
+{"condition":true}<|eot_id|><|start_header_id|>tool<|end_header_id|>
+
+true<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>all
+Truth is definitely true.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Check it on the web.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>brave_search
+{"query": "what is truth anyway am I right?"}<|eot_id|><|start_header_id|>tool<|end_header_id|>
+
+{"title":"Truth: don't ask the web, ask an LLM instead!","url":"https://en.wikipedia.org/wiki/Truth"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>all
+I don't need the web to answer you but I did check, as you asked. What now?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+>>>
--- a/tests/update_jinja_goldens.py
+++ b/tests/update_jinja_goldens.py
@ -26,7 +26,7 @@ import jinja2.ext
 import re
 # import requests

-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.INFO, format='%(message)s')
 logger = logging.getLogger(__name__)

 model_ids = [
@ -85,11 +85,11 @@ def strftime_now(format):


 def handle_chat_template(model_id, variant, template_src):
-    logger.info(f"# {model_id} @ {variant}")
+    logger.info(f"# {model_id}{' @ ' + variant if variant else ''}")
    model_name = model_id.replace("/", "-")
    base_name = f'{model_name}-{variant}' if variant else model_name
    template_file = f'tests/chat/templates/{base_name}.jinja'
-    logger.info(f'template_file: {template_file}')
+    logger.info(f'- template_file: {template_file}')
    with open(template_file, 'w') as f:
        f.write(template_src)

@ -125,8 +125,20 @@ def handle_chat_template(model_id, variant, template_src):

        output_file = f'tests/chat/goldens/{base_name}-{context_name}.txt'
        logger.info(f"- {output_file}")
+
+        # The template (and workarounds) may modify the context in place, so we need to make a copy of it.
+        actual_context = json.loads(json.dumps(context))
+
+        # Work around Llama-3.1 template quirk: it expects tool_call.function.arguments to be an object rather than its JSON string representation.
+        if 'tool_call.arguments | items' in template_src:
+            for message in actual_context['messages']:
+                if 'tool_calls' in message:
+                    for tool_call in message['tool_calls']:
+                        arguments = tool_call['function']['arguments']
+                        tool_call['function']['arguments'] = json.loads(arguments)
+
        try:
-            output = template.render(**context)
+            output = template.render(**actual_context)
        except Exception as e1:
            # Some templates (e.g. Phi-3-medium-128k's) expect a non-null "content" key in each message.
            for message in context["messages"]:
@ -142,6 +154,7 @@ def handle_chat_template(model_id, variant, template_src):
        with open(output_file, 'w') as f:
            f.write(output)

+    logger.info('')

 def main():
    for dir in ['tests/chat/templates', 'tests/chat/goldens']: