llama.cpp/scripts/run-with-preset.py

#!/usr/bin/env python3

import argparse
import os
import subprocess
import sys

import yaml

CLI_ARGS_MAIN_PERPLEXITY = [
    "batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
    "export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
    "hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",
    "interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
    "low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
    "model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
    "np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",
    "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",
    "repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",
    "simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",
    "verbose-prompt"
]

CLI_ARGS_LLAMA_BENCH = [
    "batch-size", "memory-f32", "low-vram", "model", "mul-mat-q", "n-gen", "n-gpu-layers",
    "n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"
]

CLI_ARGS_SERVER = [
    "alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",
    "low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",
    "numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",
    "threads", "verbose"
]

description = """Run llama.cpp binaries with presets from YAML file(s).
To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).
To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.

Formatting considerations:
- The YAML property names are the same as the CLI argument names of the corresponding binary.
- Properties must use the long name of their corresponding llama.cpp CLI arguments.
- Like the llama.cpp binaries the property names do not differentiate between hyphens and underscores.
- Flags must be defined as "<PROPERTY_NAME>: true" to be effective.
- To define the logit_bias property, the expected format is "<TOKEN_ID>: <BIAS>" in the "logit_bias" namespace.
- To define multiple "reverse_prompt" properties simultaneously the expected format is a list of strings.
- To define a tensor split, pass a list of floats.
"""
usage = "run-with-preset.py [-h] [yaml_files ...] [--<ARG_NAME> <ARG_VALUE> ...]"
epilog = ("  --<ARG_NAME> specify additional CLI ars to be passed to the binary (override all preset files). "
          "Unknown args will be ignored.")

parser = argparse.ArgumentParser(
    description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("-bin", "--binary", help="The binary to run.")
parser.add_argument("yaml_files", nargs="*",
                    help="Arbitrary number of YAML files from which to read preset values. "
                    "If two files specify the same values the later one will be used.")

known_args, unknown_args = parser.parse_known_args()

if not known_args.yaml_files and not unknown_args:
    parser.print_help()
    sys.exit(0)

props = dict()

for yaml_file in known_args.yaml_files:
    with open(yaml_file, "r") as f:
        props.update(yaml.load(f, yaml.SafeLoader))

props = {prop.replace("_", "-"): val for prop, val in props.items()}

binary = props.pop("binary", "main")
if known_args.binary:
    binary = known_args.binary

if os.path.exists(f"./{binary}"):
    binary = f"./{binary}"

if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):
    cli_args = CLI_ARGS_MAIN_PERPLEXITY
elif binary.lower().endswith("llama-bench"):
    cli_args = CLI_ARGS_LLAMA_BENCH
elif binary.lower().endswith("server"):
    cli_args = CLI_ARGS_SERVER
else:
    print(f"Unknown binary: {binary}")
    sys.exit(1)

command_list = [binary]

for cli_arg in cli_args:
    value = props.pop(cli_arg, None)

    if not value or value == -1:
        continue

    if cli_arg == "logit-bias":
        for token, bias in value.items():
            command_list.append("--logit-bias")
            command_list.append(f"{token}{bias:+}")
        continue

    if cli_arg == "reverse-prompt" and not isinstance(value, str):
        for rp in value:
            command_list.append("--reverse-prompt")
            command_list.append(str(rp))
        continue

    command_list.append(f"--{cli_arg}")

    if cli_arg == "tensor-split":
        command_list.append(",".join([str(v) for v in value]))
        continue

    value = str(value)

    if value != "True":
        command_list.append(str(value))

num_unused = len(props)
if num_unused > 10:
    print(f"The preset file contained a total of {num_unused} unused properties.")
elif num_unused > 0:
    print("The preset file contained the following unused properties:")
    for prop, value in props.items():
        print(f"  {prop}: {value}")

command_list += unknown_args

sp = subprocess.Popen(command_list)

while sp.returncode is None:
    try:
        sp.wait()
    except KeyboardInterrupt:
        pass

sys.exit(sp.returncode)
YAML result logging + preset script (#2657) 2023-08-28 15:59:39 +00:00			`#!/usr/bin/env python3`

			`import argparse`
			`import os`
			`import subprocess`
			`import sys`

			`import yaml`

			`CLI_ARGS_MAIN_PERPLEXITY = [`
			`"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",`
			`"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",`
			`"hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",`
			`"interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",`
			`"low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",`
llama : remove mtest (#3177) * Remove mtest * remove from common/common.h and examples/main/main.cpp 2023-09-15 07:28:45 +00:00			`"model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",`
YAML result logging + preset script (#2657) 2023-08-28 15:59:39 +00:00			`"np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",`
			`"prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",`
			`"repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",`
			`"simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",`
			`"verbose-prompt"`
			`]`

			`CLI_ARGS_LLAMA_BENCH = [`
			`"batch-size", "memory-f32", "low-vram", "model", "mul-mat-q", "n-gen", "n-gpu-layers",`
			`"n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"`
			`]`

			`CLI_ARGS_SERVER = [`
			`"alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",`
			`"low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",`
			`"numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",`
			`"threads", "verbose"`
			`]`

			`description = """Run llama.cpp binaries with presets from YAML file(s).`
			`To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).`
			`To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.`

			`Formatting considerations:`
			`- The YAML property names are the same as the CLI argument names of the corresponding binary.`
			`- Properties must use the long name of their corresponding llama.cpp CLI arguments.`
			`- Like the llama.cpp binaries the property names do not differentiate between hyphens and underscores.`
			`- Flags must be defined as "<PROPERTY_NAME>: true" to be effective.`
			`- To define the logit_bias property, the expected format is "<TOKEN_ID>: <BIAS>" in the "logit_bias" namespace.`
			`- To define multiple "reverse_prompt" properties simultaneously the expected format is a list of strings.`
			`- To define a tensor split, pass a list of floats.`
			`"""`
scripts : move run-with-preset.py from root to scripts folder 2024-01-26 15:09:44 +00:00			`usage = "run-with-preset.py [-h] [yaml_files ...] [--<ARG_NAME> <ARG_VALUE> ...]"`
YAML result logging + preset script (#2657) 2023-08-28 15:59:39 +00:00			`epilog = (" --<ARG_NAME> specify additional CLI ars to be passed to the binary (override all preset files). "`
			`"Unknown args will be ignored.")`

			`parser = argparse.ArgumentParser(`
			`description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)`
			`parser.add_argument("-bin", "--binary", help="The binary to run.")`
			`parser.add_argument("yaml_files", nargs="*",`
			`help="Arbitrary number of YAML files from which to read preset values. "`
			`"If two files specify the same values the later one will be used.")`

			`known_args, unknown_args = parser.parse_known_args()`

			`if not known_args.yaml_files and not unknown_args:`
			`parser.print_help()`
			`sys.exit(0)`

			`props = dict()`

			`for yaml_file in known_args.yaml_files:`
			`with open(yaml_file, "r") as f:`
			`props.update(yaml.load(f, yaml.SafeLoader))`

			`props = {prop.replace("_", "-"): val for prop, val in props.items()}`

			`binary = props.pop("binary", "main")`
			`if known_args.binary:`
			`binary = known_args.binary`

			`if os.path.exists(f"./{binary}"):`
			`binary = f"./{binary}"`

			`if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):`
			`cli_args = CLI_ARGS_MAIN_PERPLEXITY`
			`elif binary.lower().endswith("llama-bench"):`
			`cli_args = CLI_ARGS_LLAMA_BENCH`
			`elif binary.lower().endswith("server"):`
			`cli_args = CLI_ARGS_SERVER`
			`else:`
			`print(f"Unknown binary: {binary}")`
			`sys.exit(1)`

			`command_list = [binary]`

			`for cli_arg in cli_args:`
			`value = props.pop(cli_arg, None)`

			`if not value or value == -1:`
			`continue`

			`if cli_arg == "logit-bias":`
			`for token, bias in value.items():`
			`command_list.append("--logit-bias")`
			`command_list.append(f"{token}{bias:+}")`
			`continue`

			`if cli_arg == "reverse-prompt" and not isinstance(value, str):`
			`for rp in value:`
			`command_list.append("--reverse-prompt")`
			`command_list.append(str(rp))`
			`continue`

			`command_list.append(f"--{cli_arg}")`

			`if cli_arg == "tensor-split":`
			`command_list.append(",".join([str(v) for v in value]))`
			`continue`

			`value = str(value)`

			`if value != "True":`
			`command_list.append(str(value))`

			`num_unused = len(props)`
			`if num_unused > 10:`
			`print(f"The preset file contained a total of {num_unused} unused properties.")`
			`elif num_unused > 0:`
			`print("The preset file contained the following unused properties:")`
			`for prop, value in props.items():`
			`print(f" {prop}: {value}")`

			`command_list += unknown_args`

			`sp = subprocess.Popen(command_list)`

			`while sp.returncode is None:`
			`try:`
			`sp.wait()`
			`except KeyboardInterrupt:`
			`pass`

			`sys.exit(sp.returncode)`