llama.cpp/scripts/update_jinja_goldens.py

#!/usr/bin/env uv run
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "jinja2",
#     "huggingface_hub",
# ]
# ///
'''
  Fetches the Jinja2 templates of a few known models and use them to generate prompt goldens for a few predefined chat contexts.

  Examples:
    python ./scripts/update_jinja_goldens.py

  https://github.com/huggingface/transformers/blob/main/src/transformers/utils/chat_template_utils.py
'''

import logging
import datetime
import glob
import os
from huggingface_hub import hf_hub_download
import json
import jinja2
import jinja2.ext
import re
# import requests

logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)

model_ids = [
    "abacusai/Fewshot-Metamath-OrcaVicuna-Mistral",
    "bofenghuang/vigogne-2-70b-chat",
    "deepseek-ai/deepseek-coder-33b-instruct",
    "deepseek-ai/DeepSeek-Coder-V2-Instruct",
    "deepseek-ai/DeepSeek-V2.5",
    "indischepartij/MiniCPM-3B-OpenHermes-2.5-v2",
    "meetkai/functionary-medium-v3.1",
    "meetkai/functionary-medium-v3.2",
    "microsoft/Phi-3-medium-4k-instruct",
    "microsoft/Phi-3-mini-4k-instruct",
    "microsoft/Phi-3-small-8k-instruct",
    "microsoft/Phi-3.5-mini-instruct",
    "microsoft/Phi-3.5-vision-instruct",
    "mlabonne/AlphaMonarch-7B",
    "CohereForAI/c4ai-command-r-plus",
    "NousResearch/Hermes-2-Pro-Llama-3-8B",
    "NousResearch/Hermes-2-Pro-Mistral-7B",
    "NousResearch/Hermes-3-Llama-3.1-8B",
    "openchat/openchat-3.5-0106",
    "OrionStarAI/Orion-14B-Chat",
    "Qwen/Qwen2-7B-Instruct",
    "Qwen/Qwen2-VL-7B-Instruct",
    "Qwen/Qwen2.5-7B-Instruct",
    "Qwen/Qwen2.5-Math-7B-Instruct",
    "teknium/OpenHermes-2.5-Mistral-7B",
    "TheBloke/FusionNet_34Bx2_MoE-AWQ",

    # Gated models:
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "mistralai/Mistral-Nemo-Instruct-2407",
    "google/gemma-7b-it",
    "google/gemma-2-2b-it",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
]


def raise_exception(message: str):
    raise ValueError(message)


def tojson(x, ensure_ascii=False, indent=None, separators=None, sort_keys=False):
    return json.dumps(x, ensure_ascii=ensure_ascii, indent=indent, separators=separators, sort_keys=sort_keys)


TEST_DATE = os.environ.get('TEST_DATE', '2024-07-26')


def strftime_now(format):
    now = datetime.datetime.strptime(TEST_DATE, "%Y-%m-%d")
    # now = datetime.datetime.now()
    return now.strftime(format)


def handle_chat_template(model_id, variant, template_src):
    logger.info(f"# {model_id}{' @ ' + variant if variant else ''}")
    model_name = model_id.replace("/", "-")
    base_name = f'{model_name}-{variant}' if variant else model_name
    template_file = f'tests/chat/templates/{base_name}.jinja'
    logger.info(f'- template_file: {template_file}')
    with open(template_file, 'w') as f:
        f.write(template_src)

    logger.info(f"- {template_file}")

    env = jinja2.Environment(
        trim_blocks=True,
        lstrip_blocks=True,
        # keep_trailing_newline=False,
        extensions=[
            jinja2.ext.loopcontrols
        ])
    env.filters['safe'] = lambda x: x
    env.filters['tojson'] = tojson
    env.globals['raise_exception'] = raise_exception
    env.globals['strftime_now'] = strftime_now

    template = env.from_string(template_src)

    context_files = glob.glob('tests/chat/contexts/*.json')
    for context_file in context_files:
        context_name = context_file.split("/")[-1].replace(".json", "")
        with open(context_file, 'r') as f:
            context = json.load(f)

        output_file = f'tests/chat/goldens/{base_name}-{context_name}.txt'
        logger.info(f"- {output_file}")

        # The template (and workarounds) may modify the context in place, so we need to make a copy of it.
        render_context = json.loads(json.dumps(context))

        # Work around Llama-3.1 template quirk: it expects tool_call.function.arguments to be an object rather than its JSON string representation.
        if 'tool_call.arguments | items' in template_src or 'tool_call.arguments | tojson' in template_src:
            for message in render_context['messages']:
                if 'tool_calls' in message:
                    for tool_call in message['tool_calls']:
                        if tool_call.get('type') == 'function':
                            arguments = tool_call['function']['arguments']
                            tool_call['function']['arguments'] = json.loads(arguments)

        try:
            output = template.render(**render_context)
        except Exception as e1:
            # Some templates (e.g. Phi-3-medium-128k's) expect a non-null "content" key in each message.
            for message in context["messages"]:
                if message.get("content") is None:
                    message["content"] = ""

            try:
                output = template.render(**render_context)
            except Exception as e2:
                logger.info(f"  ERROR: {e2} (after first error: {e1})")
                output = f"ERROR: {e2}"

        with open(output_file, 'w') as f:
            f.write(output)

    logger.info('')


def main():
    for dir in ['tests/chat/templates', 'tests/chat/goldens']:
        if not os.path.isdir(dir):
            os.mkdir(dir)

    for model_id in model_ids:
        # response = requests.get(f"https://huggingface.co/{model_id}/resolve/main/tokenizer_config.json")
        # response.raise_for_status()
        # config_str = response.text
        with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json")) as f:
            config_str = f.read()

        try:
            config = json.loads(config_str)
        except json.JSONDecodeError:
            # Fix https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
            # (Remove extra '}' near the end of the file)
            config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))

        chat_template = config['chat_template']
        if isinstance(chat_template, str):
            handle_chat_template(model_id, None, chat_template)
        else:
            for ct in chat_template:
                handle_chat_template(model_id, ct['name'], ct['template'])


if __name__ == '__main__':
    main()
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`#!/usr/bin/env uv run`
			`# /// script`
			`# requires-python = ">=3.10"`
			`# dependencies = [`
			`# "jinja2",`
			`# "huggingface_hub",`
			`# ]`
			`# ///`
			`'''`
			`Fetches the Jinja2 templates of a few known models and use them to generate prompt goldens for a few predefined chat contexts.`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`Examples:`
`tool-call`: promote getting chat templates w/ dedicated script rather than rely on test resources 2024-10-02 14:03:08 +00:00			`python ./scripts/update_jinja_goldens.py`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`https://github.com/huggingface/transformers/blob/main/src/transformers/utils/chat_template_utils.py`
			`'''`

`tool-call`: fix pyright type errors 2024-09-26 02:59:38 +00:00			`import logging`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`import datetime`
			`import glob`
			`import os`
			`from huggingface_hub import hf_hub_download`
			`import json`
			`import jinja2`
			`import jinja2.ext`
			`import re`
			`# import requests`

`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00			`logging.basicConfig(level=logging.INFO, format='%(message)s')`
`tool-call`: fix pyright type errors 2024-09-26 02:59:38 +00:00			`logger = logging.getLogger(__name__)`

`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`model_ids = [`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`"abacusai/Fewshot-Metamath-OrcaVicuna-Mistral",`
			`"bofenghuang/vigogne-2-70b-chat",`
			`"deepseek-ai/deepseek-coder-33b-instruct",`
`minja`: fix identifiers parsing (when start w/ not/is/etc) and lstrip_blocks corner case (needed by DeepSeek-V2.5 2024-09-27 17:30:44 +00:00			`"deepseek-ai/DeepSeek-Coder-V2-Instruct",`
			`"deepseek-ai/DeepSeek-V2.5",`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`"indischepartij/MiniCPM-3B-OpenHermes-2.5-v2",`
`chat-template`: fix jinja tests (make safe a passthrough) 2024-09-27 02:50:04 +00:00			`"meetkai/functionary-medium-v3.1",`
`minja`: fix identifiers parsing (when start w/ not/is/etc) and lstrip_blocks corner case (needed by DeepSeek-V2.5 2024-09-27 17:30:44 +00:00			`"meetkai/functionary-medium-v3.2",`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`"microsoft/Phi-3-medium-4k-instruct",`
			`"microsoft/Phi-3-mini-4k-instruct",`
			`"microsoft/Phi-3-small-8k-instruct",`
			`"microsoft/Phi-3.5-mini-instruct",`
`chat-template`: add phi-3.5-vision-instruct 2024-09-28 23:33:19 +00:00			`"microsoft/Phi-3.5-vision-instruct",`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`"mlabonne/AlphaMonarch-7B",`
`minja`: add `indent` filter to support command-r-plus's chat templates 2024-09-27 18:00:14 +00:00			`"CohereForAI/c4ai-command-r-plus",`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`"NousResearch/Hermes-2-Pro-Llama-3-8B",`
			`"NousResearch/Hermes-2-Pro-Mistral-7B",`
`tool-call`: test Hermes-3-Llama-3.1-8B 2024-10-29 14:52:25 +00:00			`"NousResearch/Hermes-3-Llama-3.1-8B",`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`"openchat/openchat-3.5-0106",`
			`"OrionStarAI/Orion-14B-Chat",`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`"Qwen/Qwen2-7B-Instruct",`
			`"Qwen/Qwen2-VL-7B-Instruct",`
`minja`: fetch more templates (add models from test-chat-template) 2024-09-25 18:22:43 +00:00			`"Qwen/Qwen2.5-7B-Instruct",`
fix editorconfig lints 2024-09-26 01:27:46 +00:00			`"Qwen/Qwen2.5-Math-7B-Instruct",`
`minja`: fetch more templates (add models from test-chat-template) 2024-09-25 18:22:43 +00:00			`"teknium/OpenHermes-2.5-Mistral-7B",`
			`"TheBloke/FusionNet_34Bx2_MoE-AWQ",`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`# Gated models:`
`minja`: generate chat goldens w/ fixed date to support Llama-3.2-3B-Instruct (uses strftime_now) 2024-09-27 18:52:15 +00:00			`"meta-llama/Llama-3.2-3B-Instruct",`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`"meta-llama/Meta-Llama-3.1-8B-Instruct",`
`minja`: fix string subscripts, add string pipe to support Mistral-Nemo template 2024-10-22 22:39:46 +00:00			`"mistralai/Mistral-Nemo-Instruct-2407",`
`minja`: fetch more templates (add models from test-chat-template) 2024-09-25 18:22:43 +00:00			`"google/gemma-7b-it",`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`"google/gemma-2-2b-it",`
`minja`: fetch more templates (add models from test-chat-template) 2024-09-25 18:22:43 +00:00			`"mistralai/Mistral-7B-Instruct-v0.2",`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`"mistralai/Mixtral-8x7B-Instruct-v0.1",`
			`]`

fix flake8 lints 2024-09-26 01:30:17 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`def raise_exception(message: str):`
			`raise ValueError(message)`

fix flake8 lints 2024-09-26 01:30:17 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`def tojson(x, ensure_ascii=False, indent=None, separators=None, sort_keys=False):`
			`return json.dumps(x, ensure_ascii=ensure_ascii, indent=indent, separators=separators, sort_keys=sort_keys)`

fix flake8 lints 2024-09-26 01:30:17 +00:00
`minja`: generate chat goldens w/ fixed date to support Llama-3.2-3B-Instruct (uses strftime_now) 2024-09-27 18:52:15 +00:00			`TEST_DATE = os.environ.get('TEST_DATE', '2024-07-26')`
`tool-call`: cleanup tools.py 2024-09-28 17:31:51 +00:00

`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`def strftime_now(format):`
`minja`: generate chat goldens w/ fixed date to support Llama-3.2-3B-Instruct (uses strftime_now) 2024-09-27 18:52:15 +00:00			`now = datetime.datetime.strptime(TEST_DATE, "%Y-%m-%d")`
			`# now = datetime.datetime.now()`
			`return now.strftime(format)`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00
fix flake8 lints 2024-09-26 01:30:17 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`def handle_chat_template(model_id, variant, template_src):`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00			`logger.info(f"# {model_id}{' @ ' + variant if variant else ''}")`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`model_name = model_id.replace("/", "-")`
			`base_name = f'{model_name}-{variant}' if variant else model_name`
			`template_file = f'tests/chat/templates/{base_name}.jinja'`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00			`logger.info(f'- template_file: {template_file}')`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`with open(template_file, 'w') as f:`
			`f.write(template_src)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`tool-call`: fix pyright type errors 2024-09-26 02:59:38 +00:00			`logger.info(f"- {template_file}")`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`env = jinja2.Environment(`
fix flake8 lints 2024-09-26 01:30:17 +00:00			`trim_blocks=True,`
			`lstrip_blocks=True,`
			`# keep_trailing_newline=False,`
			`extensions=[`
			`jinja2.ext.loopcontrols`
			`])`
`chat-template`: fix jinja tests (make safe a passthrough) 2024-09-27 02:50:04 +00:00			`env.filters['safe'] = lambda x: x`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`env.filters['tojson'] = tojson`
			`env.globals['raise_exception'] = raise_exception`
			`env.globals['strftime_now'] = strftime_now`

			`template = env.from_string(template_src)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`context_files = glob.glob('tests/chat/contexts/*.json')`
			`for context_file in context_files:`
			`context_name = context_file.split("/")[-1].replace(".json", "")`
			`with open(context_file, 'r') as f:`
			`context = json.load(f)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`output_file = f'tests/chat/goldens/{base_name}-{context_name}.txt'`
`tool-call`: fix pyright type errors 2024-09-26 02:59:38 +00:00			`logger.info(f"- {output_file}")`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00
			`# The template (and workarounds) may modify the context in place, so we need to make a copy of it.`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`render_context = json.loads(json.dumps(context))`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00
			`# Work around Llama-3.1 template quirk: it expects tool_call.function.arguments to be an object rather than its JSON string representation.`
`tool-call`: Qwen 2.5 Instruct also requires object arguments 2024-09-28 22:05:35 +00:00			`if 'tool_call.arguments \| items' in template_src or 'tool_call.arguments \| tojson' in template_src:`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`for message in render_context['messages']:`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00			`if 'tool_calls' in message:`
			`for tool_call in message['tool_calls']:`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`if tool_call.get('type') == 'function':`
			`arguments = tool_call['function']['arguments']`
			`tool_call['function']['arguments'] = json.loads(arguments)`
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`try:`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`output = template.render(**render_context)`
fix flake8 lints 2024-09-26 01:30:17 +00:00			`except Exception as e1:`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`# Some templates (e.g. Phi-3-medium-128k's) expect a non-null "content" key in each message.`
			`for message in context["messages"]:`
			`if message.get("content") is None:`
			`message["content"] = ""`

			`try:`
`tool-call`: merge & fix jinja template tests into test-chat-template 2024-09-26 18:05:00 +00:00			`output = template.render(**render_context)`
fix flake8 lints 2024-09-26 01:30:17 +00:00			`except Exception as e2:`
`tool-call`: fix pyright type errors 2024-09-26 02:59:38 +00:00			`logger.info(f" ERROR: {e2} (after first error: {e1})")`
fix flake8 lints 2024-09-26 01:30:17 +00:00			`output = f"ERROR: {e2}"`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00
			`with open(output_file, 'w') as f:`
			`f.write(output)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: update chat template goldens w/ llama.3.1 arguments workaround 2024-09-26 17:10:27 +00:00			`logger.info('')`
fix flake8 lints 2024-09-26 01:30:17 +00:00
fix lints 2024-09-26 18:06:29 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`def main():`
			`for dir in ['tests/chat/templates', 'tests/chat/goldens']:`
			`if not os.path.isdir(dir):`
			`os.mkdir(dir)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`for model_id in model_ids:`
			`# response = requests.get(f"https://huggingface.co/{model_id}/resolve/main/tokenizer_config.json")`
			`# response.raise_for_status()`
			`# config_str = response.text`
			`with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json")) as f:`
			`config_str = f.read()`
fix editorconfig lints 2024-09-26 01:27:46 +00:00
			`try:`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`config = json.loads(config_str)`
fix flake8 lints 2024-09-26 01:30:17 +00:00			`except json.JSONDecodeError:`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`# Fix https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json`
			`# (Remove extra '}' near the end of the file)`
fix editorconfig lints 2024-09-26 01:27:46 +00:00			`config = json.loads(re.sub(r'\}([\n\s]\}[\n\s]\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))`
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00
			`chat_template = config['chat_template']`
			`if isinstance(chat_template, str):`
			`handle_chat_template(model_id, None, chat_template)`
			`else:`
			`for ct in chat_template:`
			`handle_chat_template(model_id, ct['name'], ct['template'])`

fix flake8 lints 2024-09-26 01:30:17 +00:00
`minja`: minimalist Jinja templating engine for LLM chat templates 2024-09-25 15:01:18 +00:00			`if __name__ == '__main__':`
fix editorconfig lints 2024-09-26 01:27:46 +00:00			`main()`