common : bring back missing args, add env var duplication check (#9375)

* common : bring back missing args

* move duplication check to test-arg-parser

* add check for duplicated env var

* correct default values
This commit is contained in:
Xuan Son Nguyen 2024-09-08 18:08:55 +02:00 committed by GitHub
parent a249843d89
commit 3f7ccfd649
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 99 additions and 41 deletions

View File

@ -673,17 +673,8 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
* - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example * - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
*/ */
std::unordered_set<std::string> seen_args;
auto add_opt = [&](llama_arg arg) { auto add_opt = [&](llama_arg arg) {
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) { if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
// make sure there is no argument duplications
for (const auto & a : arg.args) {
if (seen_args.find(a) == seen_args.end()) {
seen_args.insert(a);
} else {
throw std::runtime_error(format("found duplicated argument in source code: %s", a));
}
}
options.push_back(std::move(arg)); options.push_back(std::move(arg));
} }
}; };
@ -790,8 +781,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-C", "--cpu-mask"}, "M", {"-C", "--cpu-mask"}, "M",
"CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")", "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & mask) {
std::string mask = value;
params.cpuparams.mask_valid = true; params.cpuparams.mask_valid = true;
if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) { if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) {
throw std::invalid_argument("invalid cpumask"); throw std::invalid_argument("invalid cpumask");
@ -801,8 +791,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-Cr", "--cpu-range"}, "lo-hi", {"-Cr", "--cpu-range"}, "lo-hi",
"range of CPUs for affinity. Complements --cpu-mask", "range of CPUs for affinity. Complements --cpu-mask",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & range) {
std::string range = value;
params.cpuparams.mask_valid = true; params.cpuparams.mask_valid = true;
if (!parse_cpu_range(range, params.cpuparams.cpumask)) { if (!parse_cpu_range(range, params.cpuparams.cpumask)) {
throw std::invalid_argument("invalid range"); throw std::invalid_argument("invalid range");
@ -816,6 +805,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.cpuparams.strict_cpu = std::stoul(value); params.cpuparams.strict_cpu = std::stoul(value);
} }
)); ));
add_opt(llama_arg(
{"--prio"}, "N",
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
[](gpt_params & params, int prio) {
if (prio < 0 || prio > 3) {
throw std::invalid_argument("invalid value");
}
params.cpuparams.priority = (enum ggml_sched_priority) prio;
}
));
add_opt(llama_arg( add_opt(llama_arg(
{"--poll"}, "<0...100>", {"--poll"}, "<0...100>",
format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll), format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
@ -826,8 +825,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-Cb", "--cpu-mask-batch"}, "M", {"-Cb", "--cpu-mask-batch"}, "M",
"CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)", "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & mask) {
std::string mask = value;
params.cpuparams_batch.mask_valid = true; params.cpuparams_batch.mask_valid = true;
if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) { if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) {
throw std::invalid_argument("invalid cpumask"); throw std::invalid_argument("invalid cpumask");
@ -837,8 +835,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-Crb", "--cpu-range-batch"}, "lo-hi", {"-Crb", "--cpu-range-batch"}, "lo-hi",
"ranges of CPUs for affinity. Complements --cpu-mask-batch", "ranges of CPUs for affinity. Complements --cpu-mask-batch",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & range) {
std::string range = value;
params.cpuparams_batch.mask_valid = true; params.cpuparams_batch.mask_valid = true;
if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) { if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) {
throw std::invalid_argument("invalid range"); throw std::invalid_argument("invalid range");
@ -852,6 +849,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.cpuparams_batch.strict_cpu = value; params.cpuparams_batch.strict_cpu = value;
} }
)); ));
add_opt(llama_arg(
{"--prio-batch"}, "N",
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams_batch.priority),
[](gpt_params & params, int prio) {
if (prio < 0 || prio > 3) {
throw std::invalid_argument("invalid value");
}
params.cpuparams_batch.priority = (enum ggml_sched_priority) prio;
}
));
add_opt(llama_arg( add_opt(llama_arg(
{"--poll-batch"}, "<0|1>", {"--poll-batch"}, "<0|1>",
"use polling to wait for work (default: same as --poll)", "use polling to wait for work (default: same as --poll)",
@ -862,8 +869,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-Cd", "--cpu-mask-draft"}, "M", {"-Cd", "--cpu-mask-draft"}, "M",
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & mask) {
std::string mask = value;
params.draft_cpuparams.mask_valid = true; params.draft_cpuparams.mask_valid = true;
if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) { if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) {
throw std::invalid_argument("invalid cpumask"); throw std::invalid_argument("invalid cpumask");
@ -873,8 +879,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
add_opt(llama_arg( add_opt(llama_arg(
{"-Crd", "--cpu-range-draft"}, "lo-hi", {"-Crd", "--cpu-range-draft"}, "lo-hi",
"Ranges of CPUs for affinity. Complements --cpu-mask-draft", "Ranges of CPUs for affinity. Complements --cpu-mask-draft",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & range) {
std::string range = value;
params.draft_cpuparams.mask_valid = true; params.draft_cpuparams.mask_valid = true;
if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) { if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) {
throw std::invalid_argument("invalid range"); throw std::invalid_argument("invalid range");
@ -888,6 +893,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.draft_cpuparams.strict_cpu = value; params.draft_cpuparams.strict_cpu = value;
} }
).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg(
{"--prio-draft"}, "N",
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams.priority),
[](gpt_params & params, int prio) {
if (prio < 0 || prio > 3) {
throw std::invalid_argument("invalid value");
}
params.draft_cpuparams.priority = (enum ggml_sched_priority) prio;
}
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg( add_opt(llama_arg(
{"--poll-draft"}, "<0|1>", {"--poll-draft"}, "<0|1>",
"Use polling to wait for draft model work (default: same as --poll])", "Use polling to wait for draft model work (default: same as --poll])",
@ -895,11 +910,20 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.draft_cpuparams.poll = value; params.draft_cpuparams.poll = value;
} }
).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg(
{"-Cbd", "--cpu-mask-batch-draft"}, "M",
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
[](gpt_params & params, const std::string & mask) {
params.draft_cpuparams_batch.mask_valid = true;
if (!parse_cpu_mask(mask, params.draft_cpuparams_batch.cpumask)) {
throw std::invalid_argument("invalid cpumask");
}
}
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg( add_opt(llama_arg(
{"-Crbd", "--cpu-range-batch-draft"}, "lo-hi", {"-Crbd", "--cpu-range-batch-draft"}, "lo-hi",
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)", "Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)",
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & range) {
std::string range = value;
params.draft_cpuparams_batch.mask_valid = true; params.draft_cpuparams_batch.mask_valid = true;
if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) { if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) {
throw std::invalid_argument("invalid cpumask"); throw std::invalid_argument("invalid cpumask");
@ -913,6 +937,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.draft_cpuparams_batch.strict_cpu = value; params.draft_cpuparams_batch.strict_cpu = value;
} }
).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg(
{"--prio-batch-draft"}, "N",
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams_batch.priority),
[](gpt_params & params, int prio) {
if (prio < 0 || prio > 3) {
throw std::invalid_argument("invalid value");
}
params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) prio;
}
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
add_opt(llama_arg( add_opt(llama_arg(
{"--poll-batch-draft"}, "<0|1>", {"--poll-batch-draft"}, "<0|1>",
"Use polling to wait for draft model work (default: --poll-draft)", "Use polling to wait for draft model work (default: --poll-draft)",
@ -1124,21 +1158,21 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
[](gpt_params & params) { [](gpt_params & params) {
params.interactive = true; params.interactive = true;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"-if", "--interactive-first"}, {"-if", "--interactive-first"},
format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"), format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"),
[](gpt_params & params) { [](gpt_params & params) {
params.interactive_first = true; params.interactive_first = true;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"-mli", "--multiline-input"}, {"-mli", "--multiline-input"},
"allows you to write or paste multiple lines without ending each in '\\'", "allows you to write or paste multiple lines without ending each in '\\'",
[](gpt_params & params) { [](gpt_params & params) {
params.multiline_input = true; params.multiline_input = true;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"--in-prefix-bos"}, {"--in-prefix-bos"},
"prefix BOS to user inputs, preceding the `--in-prefix` string", "prefix BOS to user inputs, preceding the `--in-prefix` string",
@ -1146,7 +1180,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.input_prefix_bos = true; params.input_prefix_bos = true;
params.enable_chat_template = false; params.enable_chat_template = false;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"--in-prefix"}, "STRING", {"--in-prefix"}, "STRING",
"string to prefix user inputs with (default: empty)", "string to prefix user inputs with (default: empty)",
@ -1154,7 +1188,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.input_prefix = value; params.input_prefix = value;
params.enable_chat_template = false; params.enable_chat_template = false;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"--in-suffix"}, "STRING", {"--in-suffix"}, "STRING",
"string to suffix after user inputs with (default: empty)", "string to suffix after user inputs with (default: empty)",
@ -1162,7 +1196,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.input_suffix = value; params.input_suffix = value;
params.enable_chat_template = false; params.enable_chat_template = false;
} }
).set_examples({LLAMA_EXAMPLE_INFILL})); ).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(llama_arg( add_opt(llama_arg(
{"--no-warmup"}, {"--no-warmup"},
"skip warming up the model with an empty run", "skip warming up the model with an empty run",
@ -1499,7 +1533,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
} }
)); ));
add_opt(llama_arg( add_opt(llama_arg(
{"--all-logits"}, {"--perplexity", "--all-logits"},
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"), format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
[](gpt_params & params) { [](gpt_params & params) {
params.logits_all = true; params.logits_all = true;
@ -1554,6 +1588,13 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
params.kl_divergence = true; params.kl_divergence = true;
} }
).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); ).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
add_opt(llama_arg(
{"--save-all-logits", "--kl-divergence-base"}, "FNAME",
"set logits file",
[](gpt_params & params, const std::string & value) {
params.logits_file = value;
}
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
add_opt(llama_arg( add_opt(llama_arg(
{"--ppl-stride"}, "N", {"--ppl-stride"}, "N",
format("stride for perplexity calculation (default: %d)", params.ppl_stride), format("stride for perplexity calculation (default: %d)", params.ppl_stride),
@ -1802,7 +1843,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
[](gpt_params & params, const std::string & value) { [](gpt_params & params, const std::string & value) {
params.model_alias = value; params.model_alias = value;
} }
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL")); ).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(llama_arg( add_opt(llama_arg(
{"-m", "--model"}, "FNAME", {"-m", "--model"}, "FNAME",
ex == LLAMA_EXAMPLE_EXPORT_LORA ex == LLAMA_EXAMPLE_EXPORT_LORA
@ -1890,7 +1931,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
} }
).set_examples({LLAMA_EXAMPLE_PASSKEY})); ).set_examples({LLAMA_EXAMPLE_PASSKEY}));
add_opt(llama_arg( add_opt(llama_arg(
{"-o", "--output"}, "FNAME", {"-o", "--output", "--output-file"}, "FNAME",
format("output file (default: '%s')", format("output file (default: '%s')",
ex == LLAMA_EXAMPLE_EXPORT_LORA ex == LLAMA_EXAMPLE_EXPORT_LORA
? params.lora_outfile.c_str() ? params.lora_outfile.c_str()
@ -1932,7 +1973,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
} }
).set_examples({LLAMA_EXAMPLE_IMATRIX})); ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(llama_arg( add_opt(llama_arg(
{"--chunk"}, "N", {"--chunk", "--from-chunk"}, "N",
format("start processing the input from chunk N (default: %d)", params.i_chunk), format("start processing the input from chunk N (default: %d)", params.i_chunk),
[](gpt_params & params, int value) { [](gpt_params & params, int value) {
params.i_chunk = value; params.i_chunk = value;
@ -2057,7 +2098,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
} }
).set_examples({LLAMA_EXAMPLE_SERVER})); ).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(llama_arg( add_opt(llama_arg(
{"--timeout"}, "N", {"-to", "--timeout"}, "N",
format("server read/write timeout in seconds (default: %d)", params.timeout_read), format("server read/write timeout in seconds (default: %d)", params.timeout_read),
[](gpt_params & params, int value) { [](gpt_params & params, int value) {
params.timeout_read = value; params.timeout_read = value;

View File

@ -211,7 +211,6 @@ struct gpt_params {
bool use_mlock = false; // use mlock to keep model in memory bool use_mlock = false; // use mlock to keep model in memory
bool verbose_prompt = false; // print prompt tokens before generation bool verbose_prompt = false; // print prompt tokens before generation
bool display_prompt = true; // print prompt before generation bool display_prompt = true; // print prompt before generation
bool infill = false; // use infill mode
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
bool no_kv_offload = false; // disable KV offloading bool no_kv_offload = false; // disable KV offloading
bool warmup = true; // warmup run bool warmup = true; // warmup run

View File

@ -306,11 +306,6 @@ int main(int argc, char ** argv) {
LOG_TEE("\n\n"); LOG_TEE("\n\n");
LOG_TEE("\n##### Infill mode #####\n\n"); LOG_TEE("\n##### Infill mode #####\n\n");
if (params.infill) {
printf("\n************\n");
printf("no need to specify '--infill', always running infill\n");
printf("************\n\n");
}
if (params.interactive) { if (params.interactive) {
const char *control_message; const char *control_message;
if (params.multiline_input) { if (params.multiline_input) {

View File

@ -1,6 +1,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <sstream> #include <sstream>
#include <unordered_set>
#undef NDEBUG #undef NDEBUG
#include <cassert> #include <cassert>
@ -13,7 +14,29 @@ int main(void) {
printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n"); printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) { for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) {
try { try {
gpt_params_parser_init(params, (enum llama_example)ex); auto options = gpt_params_parser_init(params, (enum llama_example)ex);
std::unordered_set<std::string> seen_args;
std::unordered_set<std::string> seen_env_vars;
for (const auto & opt : options) {
// check for args duplications
for (const auto & arg : opt.args) {
if (seen_args.find(arg) == seen_args.end()) {
seen_args.insert(arg);
} else {
fprintf(stderr, "test-arg-parser: found different handlers for the same argument: %s", arg);
exit(1);
}
}
// check for env var duplications
if (opt.env) {
if (seen_env_vars.find(opt.env) == seen_env_vars.end()) {
seen_env_vars.insert(opt.env);
} else {
fprintf(stderr, "test-arg-parser: found different handlers for the same env var: %s", opt.env);
exit(1);
}
}
}
} catch (std::exception & e) { } catch (std::exception & e) {
printf("%s\n", e.what()); printf("%s\n", e.what());
assert(false); assert(false);