mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 02:44:36 +00:00
JSON schema conversion: ⚡️ faster repetitions, min/maxLength for strings, cap number length (#6555)
* json: rename python schema converter to make import easier * server: skip null json_schema / grammar fields * json: deps management for primitive rules (+ allow null values) * json: optimize repetitions for minItems/maxItems and regexps: `a{,3}` goes from `"a"? "a"? "a"?` (explosive combos) to `(a (a (a)?)?)?` * grammars: add troubleshooting section to readme * json: cap length of numbers to 15 digits before/after decimal point (avoids infinite gen, e.g. "one third" -> `0.333333333333...`) * json: unify all repetition code (w/ or w/o sep) * json: support string minLength/maxLength * server+json: update server/README w/ result_format * nits * json: fix type error w/ python 3.8 * json: fix server/README (json_schema in /completion vs. result_format in /v1/chat/completions) * json: simplify DOT `{"type": "string", "pattern": "^.$"}` * json: remove recursion in opt_repetitions (avoids Python stack overflow) * json: rm dead code * json: rm useless assert & ggml.h import
This commit is contained in:
parent
fbbc030ba9
commit
ab9a3240a9
@ -11,35 +11,101 @@
|
|||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
template <typename Iterator>
|
||||||
|
static std::string join(Iterator begin, Iterator end, const std::string & separator);
|
||||||
|
|
||||||
|
static std::string repeat(const std::string & str, size_t n);
|
||||||
|
|
||||||
|
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) {
|
||||||
|
if (separator_rule.empty()) {
|
||||||
|
if (min_items == 0 && max_items == 1) {
|
||||||
|
return item_rule + "?";
|
||||||
|
} else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) {
|
||||||
|
return item_rule + "+";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
if (min_items > 0) {
|
||||||
|
if (item_rule_is_literal && separator_rule.empty()) {
|
||||||
|
result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\"";
|
||||||
|
} else {
|
||||||
|
std::vector<std::string> items(min_items, item_rule);
|
||||||
|
result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::function<std::string(int, bool)> opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string {
|
||||||
|
auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule;
|
||||||
|
|
||||||
|
if (up_to_n == 0) {
|
||||||
|
return "";
|
||||||
|
} else if (up_to_n == 1) {
|
||||||
|
return "(" + content + ")?";
|
||||||
|
} else if (!separator_rule.empty() && !prefix_with_sep) {
|
||||||
|
return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?";
|
||||||
|
} else {
|
||||||
|
std::string res = repeat("(" + content + " ", up_to_n);
|
||||||
|
// strip trailing space
|
||||||
|
res = res.substr(0, res.length() - 1);
|
||||||
|
res += repeat(")?", up_to_n);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (min_items > 0 && max_items != min_items) {
|
||||||
|
result += " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_items != std::numeric_limits<int>::max()) {
|
||||||
|
result += opt_repetitions(max_items - min_items, min_items > 0);
|
||||||
|
} else {
|
||||||
|
std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")";
|
||||||
|
if (min_items == 0 && !separator_rule.empty()) {
|
||||||
|
result = "(" + item_rule + " " + item_operator + "*)?";
|
||||||
|
} else {
|
||||||
|
result += item_operator + "*";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
const std::string SPACE_RULE = "\" \"?";
|
const std::string SPACE_RULE = "\" \"?";
|
||||||
|
|
||||||
std::unordered_map<std::string, std::string> PRIMITIVE_RULES = {
|
struct BuiltinRule {
|
||||||
{"boolean", "(\"true\" | \"false\") space"},
|
std::string content;
|
||||||
{"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"},
|
std::vector<std::string> deps;
|
||||||
{"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"},
|
|
||||||
{"value", "object | array | string | number | boolean"},
|
|
||||||
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
|
|
||||||
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
|
|
||||||
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
|
|
||||||
{"string", " \"\\\"\" (\n"
|
|
||||||
" [^\"\\\\] |\n"
|
|
||||||
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
|
||||||
" )* \"\\\"\" space"},
|
|
||||||
{"null", "\"null\" space"}
|
|
||||||
};
|
};
|
||||||
std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"};
|
|
||||||
|
|
||||||
std::unordered_map<std::string, std::string> DATE_RULES = {
|
const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15);
|
||||||
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"},
|
|
||||||
{"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"},
|
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||||
{"date-time", "date \"T\" time"},
|
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
||||||
{"date-string", "\"\\\"\" date \"\\\"\" space"},
|
{"decimal-part", {"[0-9] " + _up_to_15_digits, {}}},
|
||||||
{"time-string", "\"\\\"\" time \"\\\"\" space"},
|
{"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}},
|
||||||
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"}
|
{"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}},
|
||||||
|
{"integer", {"(\"-\"? integral-part) space", {"integral-part"}}},
|
||||||
|
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
|
||||||
|
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
|
||||||
|
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
|
||||||
|
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
|
||||||
|
{"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}},
|
||||||
|
{"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}},
|
||||||
|
{"null", {"\"null\" space", {}}},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||||
|
{"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||||
|
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||||
|
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
||||||
|
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
|
||||||
|
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
|
||||||
|
{"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}}
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool is_reserved_name(const std::string & name) {
|
static bool is_reserved_name(const std::string & name) {
|
||||||
@ -47,7 +113,7 @@ static bool is_reserved_name(const std::string & name) {
|
|||||||
if (RESERVED_NAMES.empty()) {
|
if (RESERVED_NAMES.empty()) {
|
||||||
RESERVED_NAMES.insert("root");
|
RESERVED_NAMES.insert("root");
|
||||||
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
||||||
for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first);
|
for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first);
|
||||||
}
|
}
|
||||||
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
||||||
}
|
}
|
||||||
@ -192,7 +258,7 @@ private:
|
|||||||
if (_dotall) {
|
if (_dotall) {
|
||||||
rule = "[\\U00000000-\\U0010FFFF]";
|
rule = "[\\U00000000-\\U0010FFFF]";
|
||||||
} else {
|
} else {
|
||||||
rule = "[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]";
|
rule = "[^\\x0A\\x0D]";
|
||||||
}
|
}
|
||||||
return _add_rule("dot", rule);
|
return _add_rule("dot", rule);
|
||||||
};
|
};
|
||||||
@ -308,13 +374,6 @@ private:
|
|||||||
auto &sub = last.first;
|
auto &sub = last.first;
|
||||||
auto sub_is_literal = last.second;
|
auto sub_is_literal = last.second;
|
||||||
|
|
||||||
if (min_times == 0 && max_times == std::numeric_limits<int>::max()) {
|
|
||||||
sub += "*";
|
|
||||||
} else if (min_times == 0 && max_times == 1) {
|
|
||||||
sub += "?";
|
|
||||||
} else if (min_times == 1 && max_times == std::numeric_limits<int>::max()) {
|
|
||||||
sub += "+";
|
|
||||||
} else {
|
|
||||||
if (!sub_is_literal) {
|
if (!sub_is_literal) {
|
||||||
std::string & sub_id = sub_rule_ids[sub];
|
std::string & sub_id = sub_rule_ids[sub];
|
||||||
if (sub_id.empty()) {
|
if (sub_id.empty()) {
|
||||||
@ -322,33 +381,14 @@ private:
|
|||||||
}
|
}
|
||||||
sub = sub_id;
|
sub = sub_id;
|
||||||
}
|
}
|
||||||
std::string result;
|
seq.back().first = build_repetition(
|
||||||
if (sub_is_literal && min_times > 0) {
|
sub_is_literal ? "\"" + sub + "\"" : sub,
|
||||||
result = "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\"";
|
min_times,
|
||||||
} else {
|
max_times,
|
||||||
for (int j = 0; j < min_times; j++) {
|
"",
|
||||||
if (j > 0) {
|
sub_is_literal
|
||||||
result += " ";
|
);
|
||||||
}
|
|
||||||
result += sub;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (min_times > 0 && min_times < max_times) {
|
|
||||||
result += " ";
|
|
||||||
}
|
|
||||||
if (max_times == std::numeric_limits<int>::max()) {
|
|
||||||
result += sub + "*";
|
|
||||||
} else {
|
|
||||||
for (int j = min_times; j < max_times; j++) {
|
|
||||||
if (j > min_times) {
|
|
||||||
result += " ";
|
|
||||||
}
|
|
||||||
result += sub + "?";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
seq.back().first = result;
|
|
||||||
seq.back().second = false;
|
seq.back().second = false;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
std::string literal;
|
std::string literal;
|
||||||
auto is_non_literal = [&](char c) {
|
auto is_non_literal = [&](char c) {
|
||||||
@ -424,7 +464,7 @@ private:
|
|||||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
||||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
||||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||||
prop_kv_rule_names["*"] = kv_rule;
|
prop_kv_rule_names["*"] = kv_rule;
|
||||||
optional_props.push_back("*");
|
optional_props.push_back("*");
|
||||||
}
|
}
|
||||||
@ -486,6 +526,25 @@ private:
|
|||||||
return rule;
|
return rule;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string _add_primitive(const std::string & name, const BuiltinRule & rule) {
|
||||||
|
auto n = _add_rule(name, rule.content);
|
||||||
|
for (const auto & dep : rule.deps) {
|
||||||
|
BuiltinRule dep_rule;
|
||||||
|
auto it = PRIMITIVE_RULES.find(dep);
|
||||||
|
if (it == PRIMITIVE_RULES.end()) {
|
||||||
|
it = STRING_FORMAT_RULES.find(dep);
|
||||||
|
if (it == STRING_FORMAT_RULES.end()) {
|
||||||
|
_errors.push_back("Rule " + dep + " not known");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (_rules.find(dep) == _rules.end()) {
|
||||||
|
_add_primitive(dep, it->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SchemaConverter(
|
SchemaConverter(
|
||||||
const std::function<json(const std::string &)> & fetch_json,
|
const std::function<json(const std::string &)> & fetch_json,
|
||||||
@ -647,49 +706,33 @@ public:
|
|||||||
return _add_rule(rule_name, rule);
|
return _add_rule(rule_name, rule);
|
||||||
} else {
|
} else {
|
||||||
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
|
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
|
||||||
std::string list_item_operator = "( \",\" space " + item_rule_name + " )";
|
|
||||||
std::string successive_items;
|
|
||||||
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
|
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
|
||||||
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
||||||
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : -1;
|
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
|
||||||
if (min_items > 0) {
|
|
||||||
successive_items += repeat(list_item_operator, min_items - 1);
|
return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
|
||||||
min_items--;
|
|
||||||
}
|
|
||||||
if (max_items >= 0 && max_items > min_items) {
|
|
||||||
successive_items += repeat(list_item_operator + "?", max_items - min_items - 1);
|
|
||||||
} else {
|
|
||||||
successive_items += list_item_operator + "*";
|
|
||||||
}
|
|
||||||
std::string rule;
|
|
||||||
if (min_items == 0) {
|
|
||||||
rule = "\"[\" space ( " + item_rule_name + " " + successive_items + " )? \"]\" space";
|
|
||||||
} else {
|
|
||||||
rule = "\"[\" space " + item_rule_name + " " + successive_items + " \"]\" space";
|
|
||||||
}
|
|
||||||
return _add_rule(rule_name, rule);
|
|
||||||
}
|
}
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||||
return _visit_pattern(schema["pattern"], rule_name);
|
return _visit_pattern(schema["pattern"], rule_name);
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||||
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {
|
} else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
|
||||||
for (const auto & kv : DATE_RULES) {
|
auto prim_name = schema_format + "-string";
|
||||||
_add_rule(kv.first, kv.second);
|
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
|
||||||
}
|
} else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
|
||||||
return schema_format + "-string";
|
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
||||||
|
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
||||||
|
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||||
|
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||||
} else if (schema.empty() || schema_type == "object") {
|
} else if (schema.empty() || schema_type == "object") {
|
||||||
for (const auto & n : OBJECT_RULE_NAMES) {
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||||
_add_rule(n, PRIMITIVE_RULES.at(n));
|
|
||||||
}
|
|
||||||
return _add_rule(rule_name, "object");
|
|
||||||
} else {
|
} else {
|
||||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,37 +6,94 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List, Set, Tuple, Union
|
from typing import Any, Dict, List, Set, Tuple, Union
|
||||||
|
|
||||||
|
def _build_repetition(item_rule, min_items, max_items, separator_rule=None, item_rule_is_literal=False):
|
||||||
|
if not separator_rule:
|
||||||
|
if min_items == 0 and max_items == 1:
|
||||||
|
return f'{item_rule}?'
|
||||||
|
elif min_items == 1 and max_items is None:
|
||||||
|
return f'{item_rule}+'
|
||||||
|
|
||||||
|
result = ''
|
||||||
|
|
||||||
|
if min_items > 0:
|
||||||
|
if item_rule_is_literal and separator_rule is None:
|
||||||
|
result = '"' + (item_rule[1:-1] * min_items) + '"'
|
||||||
|
else:
|
||||||
|
result = (f' {separator_rule} ' if separator_rule else ' ').join([item_rule] * min_items)
|
||||||
|
|
||||||
|
def opt_repetitions(up_to_n, prefix_with_sep=False):
|
||||||
|
'''
|
||||||
|
- n=4, no sep: '(a (a (a (a)?)?)?)?'
|
||||||
|
- n=4, sep=',', prefix: '("," a ("," a ("," a ("," a)?)?)?)?'
|
||||||
|
- n=4, sep=',', no prefix: '(a ("," a ("," a ("," a)?)?)?)?'
|
||||||
|
'''
|
||||||
|
|
||||||
|
content = f'{separator_rule} {item_rule}' if prefix_with_sep and separator_rule else item_rule
|
||||||
|
if up_to_n == 0:
|
||||||
|
return ''
|
||||||
|
elif up_to_n == 1:
|
||||||
|
return f'({content})?'
|
||||||
|
elif separator_rule and not prefix_with_sep:
|
||||||
|
return f'({content} {opt_repetitions(up_to_n - 1, prefix_with_sep=True)})?'
|
||||||
|
else:
|
||||||
|
return (f'({content} ' * up_to_n).rstrip() + (')?' * up_to_n)
|
||||||
|
|
||||||
|
if min_items > 0 and max_items != min_items:
|
||||||
|
result += ' '
|
||||||
|
|
||||||
|
if max_items is not None:
|
||||||
|
result += opt_repetitions(max_items - min_items, prefix_with_sep=min_items > 0)
|
||||||
|
else:
|
||||||
|
item_operator = f'({separator_rule + " " if separator_rule else ""}{item_rule})'
|
||||||
|
|
||||||
|
if min_items == 0 and separator_rule:
|
||||||
|
result = f'({item_rule} {item_operator}*)?'
|
||||||
|
else:
|
||||||
|
result += f'{item_operator}*'
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class BuiltinRule:
|
||||||
|
def __init__(self, content: str, deps: list = None):
|
||||||
|
self.content = content
|
||||||
|
self.deps = deps or []
|
||||||
|
|
||||||
|
_up_to_15_digits = _build_repetition('[0-9]', 0, 15)
|
||||||
|
|
||||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||||
# whitespace. Also maybe improves generation quality?
|
# whitespace. Also maybe improves generation quality?
|
||||||
SPACE_RULE = '" "?'
|
SPACE_RULE = '" "?'
|
||||||
|
|
||||||
PRIMITIVE_RULES = {
|
PRIMITIVE_RULES = {
|
||||||
'boolean': '("true" | "false") space',
|
'boolean' : BuiltinRule('("true" | "false") space', []),
|
||||||
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
'decimal-part' : BuiltinRule('[0-9] ' + _up_to_15_digits, []),
|
||||||
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
'integral-part': BuiltinRule('[0-9] | [1-9] ' + _up_to_15_digits, []),
|
||||||
'value' : 'object | array | string | number | boolean',
|
'number' : BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
|
||||||
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
'integer' : BuiltinRule('("-"? integral-part) space', ['integral-part']),
|
||||||
'array' : '"[" space ( value ("," space value)* )? "]" space',
|
'value' : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||||
'uuid' : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space',
|
'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||||
'string': r''' "\"" (
|
'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||||
[^"\\] |
|
'uuid' : BuiltinRule(r'"\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + r' "\"" space', []),
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
'char' : BuiltinRule(r'[^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])', []),
|
||||||
)* "\"" space''',
|
'string' : BuiltinRule(r'"\"" char* "\"" space', ['char']),
|
||||||
'null': '"null" space',
|
'null' : BuiltinRule('"null" space', []),
|
||||||
}
|
}
|
||||||
OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']
|
|
||||||
|
|
||||||
# TODO: support "uri", "email" string formats
|
# TODO: support "uri", "email" string formats
|
||||||
DATE_RULES = {
|
STRING_FORMAT_RULES = {
|
||||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
'date' : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
'time' : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||||
'date-time': 'date "T" time',
|
'date-time' : BuiltinRule('date "T" time', ['date', 'time']),
|
||||||
'date-string': '"\\"" date "\\"" space',
|
'date-string' : BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||||
'time-string': '"\\"" time "\\"" space',
|
'time-string' : BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||||
'date-time-string': '"\\"" date-time "\\"" space',
|
'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||||
}
|
}
|
||||||
|
|
||||||
RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()])
|
DOTALL = '[\\U00000000-\\U0010FFFF]'
|
||||||
|
DOT = '[^\\x0A\\x0D]'
|
||||||
|
|
||||||
|
RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()])
|
||||||
|
|
||||||
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||||
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||||
@ -46,8 +103,6 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']'
|
|||||||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
||||||
|
|
||||||
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
|
||||||
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
|
||||||
|
|
||||||
class SchemaConverter:
|
class SchemaConverter:
|
||||||
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
||||||
@ -55,7 +110,9 @@ class SchemaConverter:
|
|||||||
self._allow_fetch = allow_fetch
|
self._allow_fetch = allow_fetch
|
||||||
self._dotall = dotall
|
self._dotall = dotall
|
||||||
self._raw_pattern = raw_pattern
|
self._raw_pattern = raw_pattern
|
||||||
self._rules = {'space': SPACE_RULE}
|
self._rules = {
|
||||||
|
'space': SPACE_RULE,
|
||||||
|
}
|
||||||
self._refs = {}
|
self._refs = {}
|
||||||
self._refs_being_resolved = set()
|
self._refs_being_resolved = set()
|
||||||
|
|
||||||
@ -65,6 +122,29 @@ class SchemaConverter:
|
|||||||
)
|
)
|
||||||
return f'"{escaped}"'
|
return f'"{escaped}"'
|
||||||
|
|
||||||
|
def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str:
|
||||||
|
'''
|
||||||
|
not_literal('a') -> '[^a]'
|
||||||
|
not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?'
|
||||||
|
'''
|
||||||
|
assert len(literal) > 0, 'Empty literal not supported'
|
||||||
|
def recurse(i: int):
|
||||||
|
c = literal[i]
|
||||||
|
if maybe_escaped_underscores and c == '_':
|
||||||
|
yield f'[^{c}\\\\]'
|
||||||
|
yield ' | '
|
||||||
|
yield f'"\\\\"? "{c}"'
|
||||||
|
else:
|
||||||
|
yield f'[^{c}]'
|
||||||
|
if i < len(literal) - 1:
|
||||||
|
yield ' | '
|
||||||
|
yield self._format_literal(c)
|
||||||
|
yield ' ('
|
||||||
|
yield from recurse(i + 1)
|
||||||
|
yield ')?'
|
||||||
|
|
||||||
|
return ''.join(('(', *recurse(0), ')'))
|
||||||
|
|
||||||
def _add_rule(self, name, rule):
|
def _add_rule(self, name, rule):
|
||||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||||
@ -169,10 +249,10 @@ class SchemaConverter:
|
|||||||
|
|
||||||
def get_dot():
|
def get_dot():
|
||||||
if self._dotall:
|
if self._dotall:
|
||||||
rule = '[\\U00000000-\\U0010FFFF]'
|
rule = DOTALL
|
||||||
else:
|
else:
|
||||||
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
rule = DOT
|
||||||
return self._add_rule(f'dot', rule)
|
return self._add_rule(f'dot', rule)
|
||||||
|
|
||||||
def join_seq():
|
def join_seq():
|
||||||
@ -246,13 +326,6 @@ class SchemaConverter:
|
|||||||
|
|
||||||
(sub, sub_is_literal) = seq[-1]
|
(sub, sub_is_literal) = seq[-1]
|
||||||
|
|
||||||
if min_times == 0 and max_times is None:
|
|
||||||
seq[-1] = (f'{sub}*', False)
|
|
||||||
elif min_times == 0 and max_times == 1:
|
|
||||||
seq[-1] = (f'{sub}?', False)
|
|
||||||
elif min_times == 1 and max_times is None:
|
|
||||||
seq[-1] = (f'{sub}+', False)
|
|
||||||
else:
|
|
||||||
if not sub_is_literal:
|
if not sub_is_literal:
|
||||||
id = sub_rule_ids.get(sub)
|
id = sub_rule_ids.get(sub)
|
||||||
if id is None:
|
if id is None:
|
||||||
@ -260,12 +333,7 @@ class SchemaConverter:
|
|||||||
sub_rule_ids[sub] = id
|
sub_rule_ids[sub] = id
|
||||||
sub = id
|
sub = id
|
||||||
|
|
||||||
seq[-1] = (
|
seq[-1] = (_build_repetition(f'"{sub}"' if sub_is_literal else sub, min_times, max_times, item_rule_is_literal=sub_is_literal), False)
|
||||||
' '.join(
|
|
||||||
([f'"{sub[1:-1] * min_times}"'] if sub_is_literal else [sub] * min_times) +
|
|
||||||
([f'{sub}?'] * (max_times - min_times) if max_times is not None else [f'{sub}*'])),
|
|
||||||
False
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
literal = ''
|
literal = ''
|
||||||
while i < length:
|
while i < length:
|
||||||
@ -373,49 +441,47 @@ class SchemaConverter:
|
|||||||
' "]" space')
|
' "]" space')
|
||||||
else:
|
else:
|
||||||
item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item')
|
item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item')
|
||||||
list_item_operator = f'( "," space {item_rule_name} )'
|
|
||||||
successive_items = ""
|
|
||||||
min_items = schema.get("minItems", 0)
|
min_items = schema.get("minItems", 0)
|
||||||
max_items = schema.get("maxItems")
|
max_items = schema.get("maxItems")
|
||||||
if min_items > 0:
|
return self._add_rule(rule_name, '"[" space ' + _build_repetition(item_rule_name, min_items, max_items, separator_rule='"," space') + ' "]" space')
|
||||||
successive_items = list_item_operator * (min_items - 1)
|
|
||||||
min_items -= 1
|
|
||||||
if max_items is not None and max_items > min_items:
|
|
||||||
successive_items += (list_item_operator + "?") * (max_items - min_items - 1)
|
|
||||||
else:
|
|
||||||
successive_items += list_item_operator + "*"
|
|
||||||
if min_items == 0:
|
|
||||||
rule = f'"[" space ( {item_rule_name} {successive_items} )? "]" space'
|
|
||||||
else:
|
|
||||||
rule = f'"[" space {item_rule_name} {successive_items} "]" space'
|
|
||||||
return self._add_rule(rule_name, rule)
|
|
||||||
|
|
||||||
elif schema_type in (None, 'string') and 'pattern' in schema:
|
elif schema_type in (None, 'string') and 'pattern' in schema:
|
||||||
return self._visit_pattern(schema['pattern'], rule_name)
|
return self._visit_pattern(schema['pattern'], rule_name)
|
||||||
|
|
||||||
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
||||||
return self._add_rule(
|
return self._add_primitive(
|
||||||
'root' if rule_name == 'root' else schema_format,
|
'root' if rule_name == 'root' else schema_format,
|
||||||
PRIMITIVE_RULES['uuid']
|
PRIMITIVE_RULES['uuid']
|
||||||
)
|
)
|
||||||
|
|
||||||
elif schema_type in (None, 'string') and schema_format in DATE_RULES:
|
elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES:
|
||||||
for t, r in DATE_RULES.items():
|
prim_name = f'{schema_format}-string'
|
||||||
self._add_rule(t, r)
|
return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name]))
|
||||||
return schema_format + '-string'
|
|
||||||
|
elif schema_type == 'string' and ('minLength' in schema or 'maxLength' in schema):
|
||||||
|
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
|
||||||
|
min_len = schema.get('minLength', 0)
|
||||||
|
max_len = schema.get('maxLength')
|
||||||
|
|
||||||
|
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
||||||
|
|
||||||
elif (schema_type == 'object') or (len(schema) == 0):
|
elif (schema_type == 'object') or (len(schema) == 0):
|
||||||
for n in OBJECT_RULE_NAMES:
|
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||||
self._add_rule(n, PRIMITIVE_RULES[n])
|
|
||||||
return self._add_rule(rule_name, 'object')
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||||
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return self._add_rule(
|
return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type])
|
||||||
'root' if rule_name == 'root' else schema_type,
|
|
||||||
PRIMITIVE_RULES[schema_type]
|
def _add_primitive(self, name: str, rule: BuiltinRule):
|
||||||
)
|
n = self._add_rule(name, rule.content)
|
||||||
|
|
||||||
|
for dep in rule.deps:
|
||||||
|
dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep)
|
||||||
|
assert dep_rule, f'Rule {dep} not known'
|
||||||
|
if dep not in self._rules:
|
||||||
|
self._add_primitive(dep, dep_rule)
|
||||||
|
return n
|
||||||
|
|
||||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||||
prop_order = self._prop_order
|
prop_order = self._prop_order
|
||||||
@ -437,7 +503,7 @@ class SchemaConverter:
|
|||||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||||
prop_kv_rule_names["*"] = self._add_rule(
|
prop_kv_rule_names["*"] = self._add_rule(
|
||||||
f'{sub_name}-kv',
|
f'{sub_name}-kv',
|
||||||
self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||||
)
|
)
|
||||||
optional_props.append("*")
|
optional_props.append("*")
|
||||||
|
|
@ -8,7 +8,7 @@ print(subprocess.check_output(
|
|||||||
"python",
|
"python",
|
||||||
os.path.join(
|
os.path.join(
|
||||||
os.path.dirname(os.path.realpath(__file__)),
|
os.path.dirname(os.path.realpath(__file__)),
|
||||||
"json-schema-to-grammar.py"),
|
"json_schema_to_grammar.py"),
|
||||||
*rest,
|
*rest,
|
||||||
"-",
|
"-",
|
||||||
"--raw-pattern",
|
"--raw-pattern",
|
||||||
|
@ -11,6 +11,7 @@ Set of LLM REST APIs and a simple web front end to interact with llama.cpp.
|
|||||||
* Continuous batching
|
* Continuous batching
|
||||||
* Multimodal (wip)
|
* Multimodal (wip)
|
||||||
* Monitoring endpoints
|
* Monitoring endpoints
|
||||||
|
* Schema-constrained JSON response format
|
||||||
|
|
||||||
The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216).
|
The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216).
|
||||||
|
|
||||||
@ -250,6 +251,8 @@ node index.js
|
|||||||
|
|
||||||
`grammar`: Set grammar for grammar-based sampling. Default: no grammar
|
`grammar`: Set grammar for grammar-based sampling. Default: no grammar
|
||||||
|
|
||||||
|
`json_schema`: Set a JSON schema for grammar-based sampling (e.g. `{"items": {"type": "string"}, "minItems": 10, "maxItems": 100}` of a list of strings, or `{}` for any JSON). See [tests](../../tests/test-json-schema-to-grammar.cpp) for supported features. Default: no JSON schema.
|
||||||
|
|
||||||
`seed`: Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
|
`seed`: Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
|
||||||
|
|
||||||
`ignore_eos`: Ignore end of stream token and continue generating. Default: `false`
|
`ignore_eos`: Ignore end of stream token and continue generating. Default: `false`
|
||||||
@ -365,6 +368,8 @@ Notice that each `probs` is an array of length `n_probs`.
|
|||||||
|
|
||||||
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported.
|
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported.
|
||||||
|
|
||||||
|
The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}`), similar to other OpenAI-inspired API providers.
|
||||||
|
|
||||||
*Examples:*
|
*Examples:*
|
||||||
|
|
||||||
You can use either Python `openai` library with appropriate checkpoints:
|
You can use either Python `openai` library with appropriate checkpoints:
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,33 +1,95 @@
|
|||||||
// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
|
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||||
const SPACE_RULE = '" "?';
|
const SPACE_RULE = '" "?';
|
||||||
|
|
||||||
|
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||||
|
const separatorRule = opts.separatorRule ?? '';
|
||||||
|
const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
|
||||||
|
|
||||||
|
if (separatorRule === '') {
|
||||||
|
if (minItems === 0 && maxItems === 1) {
|
||||||
|
return `${itemRule}?`;
|
||||||
|
} else if (minItems === 1 && maxItems === undefined) {
|
||||||
|
return `${itemRule}+`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = '';
|
||||||
|
if (minItems > 0) {
|
||||||
|
if (itemRuleIsLiteral && separatorRule === '') {
|
||||||
|
result = `"${itemRule.slice(1, -1).repeat(minItems)}"`;
|
||||||
|
} else {
|
||||||
|
result = Array.from({ length: minItems }, () => itemRule)
|
||||||
|
.join(separatorRule !== '' ? ` ${separatorRule} ` : ' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const optRepetitions = (upToN, prefixWithSep=false) => {
|
||||||
|
const content = separatorRule !== '' && prefixWithSep ? `${separatorRule} ${itemRule}` : itemRule;
|
||||||
|
if (upToN === 0) {
|
||||||
|
return '';
|
||||||
|
} else if (upToN === 1) {
|
||||||
|
return `(${content})?`;
|
||||||
|
} else if (separatorRule !== '' && !prefixWithSep) {
|
||||||
|
return `(${content} ${optRepetitions(upToN - 1, true)})?`;
|
||||||
|
} else {
|
||||||
|
return Array.from({ length: upToN }, () => `(${content}`).join(' ').trim() + Array.from({ length: upToN }, () => ')?').join('');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (minItems > 0 && maxItems !== minItems) {
|
||||||
|
result += ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxItems !== undefined) {
|
||||||
|
result += optRepetitions(maxItems - minItems, minItems > 0);
|
||||||
|
} else {
|
||||||
|
const itemOperator = `(${separatorRule !== '' ? separatorRule + ' ' : ''}${itemRule})`;
|
||||||
|
|
||||||
|
if (minItems === 0 && separatorRule !== '') {
|
||||||
|
result = `(${itemRule} ${itemOperator}*)?`;
|
||||||
|
} else {
|
||||||
|
result += `${itemOperator}*`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
class BuiltinRule {
|
||||||
|
constructor(content, deps) {
|
||||||
|
this.content = content;
|
||||||
|
this.deps = deps || [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const UP_TO_15_DIGITS = _buildRepetition('[0-9]', 0, 15);
|
||||||
|
|
||||||
const PRIMITIVE_RULES = {
|
const PRIMITIVE_RULES = {
|
||||||
boolean: '("true" | "false") space',
|
boolean : new BuiltinRule('("true" | "false") space', []),
|
||||||
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
'decimal-part' : new BuiltinRule('[0-9] ' + UP_TO_15_DIGITS, []),
|
||||||
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
'integral-part': new BuiltinRule('[0-9] | [1-9] ' + UP_TO_15_DIGITS, []),
|
||||||
value: 'object | array | string | number | boolean',
|
number : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
|
||||||
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
integer : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
|
||||||
array: '"[" space ( value ("," space value)* )? "]" space',
|
value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||||
uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space',
|
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||||
string: ` "\\"" (
|
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||||
[^"\\\\] |
|
uuid : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []),
|
||||||
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])`, []),
|
||||||
)* "\\"" space`,
|
string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
|
||||||
null: '"null" space',
|
null : new BuiltinRule('"null" space', []),
|
||||||
};
|
};
|
||||||
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
|
|
||||||
|
|
||||||
// TODO: support "uri", "email" string formats
|
// TODO: support "uri", "email" string formats
|
||||||
const DATE_RULES = {
|
const STRING_FORMAT_RULES = {
|
||||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
'date' : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||||
'date-time': 'date "T" time',
|
'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
|
||||||
'date-string': '"\\"" date "\\"" space',
|
'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||||
'time-string': '"\\"" time "\\"" space',
|
'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||||
'date-time-string': '"\\"" date-time "\\"" space',
|
'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||||
};
|
}
|
||||||
|
|
||||||
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES};
|
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
|
||||||
|
|
||||||
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
||||||
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
||||||
@ -158,7 +220,7 @@ export class SchemaConverter {
|
|||||||
rule = '[\\U00000000-\\U0010FFFF]';
|
rule = '[\\U00000000-\\U0010FFFF]';
|
||||||
} else {
|
} else {
|
||||||
// Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
// Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]';
|
rule = '[^\\x0A\\x0D]';
|
||||||
}
|
}
|
||||||
return this._addRule('dot', rule);
|
return this._addRule('dot', rule);
|
||||||
};
|
};
|
||||||
@ -259,13 +321,6 @@ export class SchemaConverter {
|
|||||||
|
|
||||||
let [sub, subIsLiteral] = seq[seq.length - 1];
|
let [sub, subIsLiteral] = seq[seq.length - 1];
|
||||||
|
|
||||||
if (minTimes === 0 && maxTimes === Infinity) {
|
|
||||||
seq[seq.length - 1] = [`${sub}*`, false];
|
|
||||||
} else if (minTimes === 0 && maxTimes === 1) {
|
|
||||||
seq[seq.length - 1] = [`${sub}?`, false];
|
|
||||||
} else if (minTimes === 1 && maxTimes === Infinity) {
|
|
||||||
seq[seq.length - 1] = [`${sub}+`, false];
|
|
||||||
} else {
|
|
||||||
if (!subIsLiteral) {
|
if (!subIsLiteral) {
|
||||||
let id = subRuleIds[sub];
|
let id = subRuleIds[sub];
|
||||||
if (id === undefined) {
|
if (id === undefined) {
|
||||||
@ -275,10 +330,10 @@ export class SchemaConverter {
|
|||||||
sub = id;
|
sub = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub);
|
seq[seq.length - 1] = [
|
||||||
const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`];
|
_buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}),
|
||||||
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
|
false
|
||||||
}
|
];
|
||||||
} else {
|
} else {
|
||||||
let literal = '';
|
let literal = '';
|
||||||
while (i < length) {
|
while (i < length) {
|
||||||
@ -394,49 +449,50 @@ export class SchemaConverter {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
|
const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
|
||||||
const listItemOperator = `( "," space ${itemRuleName} )`;
|
const minItems = schema.minItems || 0;
|
||||||
let successiveItems = '';
|
|
||||||
let minItems = schema.minItems || 0;
|
|
||||||
const maxItems = schema.maxItems;
|
const maxItems = schema.maxItems;
|
||||||
if (minItems > 0) {
|
return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space');
|
||||||
successiveItems = listItemOperator.repeat(minItems - 1);
|
|
||||||
minItems--;
|
|
||||||
}
|
|
||||||
if (maxItems !== undefined && maxItems > minItems) {
|
|
||||||
successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1);
|
|
||||||
} else {
|
|
||||||
successiveItems += `${listItemOperator}*`;
|
|
||||||
}
|
|
||||||
const rule = minItems === 0
|
|
||||||
? `"[" space ( ${itemRuleName} ${successiveItems} )? "]" space`
|
|
||||||
: `"[" space ${itemRuleName} ${successiveItems} "]" space`;
|
|
||||||
return this._addRule(ruleName, rule);
|
|
||||||
}
|
}
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
||||||
return this._visitPattern(schema.pattern, ruleName);
|
return this._visitPattern(schema.pattern, ruleName);
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
||||||
return this._addRule(
|
return this._addPrimitive(
|
||||||
ruleName === 'root' ? 'root' : schemaFormat,
|
ruleName === 'root' ? 'root' : schemaFormat,
|
||||||
PRIMITIVE_RULES['uuid'])
|
PRIMITIVE_RULES['uuid']
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
|
);
|
||||||
for (const [t, r] of Object.entries(DATE_RULES)) {
|
} else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
|
||||||
this._addRule(t, r);
|
const primName = `${schema.format}-string`
|
||||||
}
|
return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
|
||||||
return schemaFormat + '-string';
|
} else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) {
|
||||||
|
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
|
||||||
|
const minLen = schema.minLength || 0;
|
||||||
|
const maxLen = schema.maxLength;
|
||||||
|
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
||||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||||
for (const n of OBJECT_RULE_NAMES) {
|
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||||
this._addRule(n, PRIMITIVE_RULES[n]);
|
|
||||||
}
|
|
||||||
return this._addRule(ruleName, 'object');
|
|
||||||
} else {
|
} else {
|
||||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||||
}
|
}
|
||||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_addPrimitive(name, rule) {
|
||||||
|
let n = this._addRule(name, rule.content);
|
||||||
|
for (const dep of rule.deps) {
|
||||||
|
const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
|
||||||
|
if (!depRule) {
|
||||||
|
throw new Error(`Rule ${dep} not known`);
|
||||||
|
}
|
||||||
|
if (!(dep in this._rules)) {
|
||||||
|
this._addPrimitive(dep, depRule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
_buildObjectRule(properties, required, name, additionalProperties) {
|
_buildObjectRule(properties, required, name, additionalProperties) {
|
||||||
const propOrder = this._propOrder;
|
const propOrder = this._propOrder;
|
||||||
// sort by position in prop_order (if specified) then by original order
|
// sort by position in prop_order (if specified) then by original order
|
||||||
@ -462,7 +518,7 @@ export class SchemaConverter {
|
|||||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||||
propKvRuleNames['*'] = this._addRule(
|
propKvRuleNames['*'] = this._addRule(
|
||||||
`${subName}-kv`,
|
`${subName}-kv`,
|
||||||
`${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||||
optionalProps.push('*');
|
optionalProps.push('*');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -859,7 +859,7 @@ struct server_context {
|
|||||||
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
|
||||||
// process "json_schema" and "grammar"
|
// process "json_schema" and "grammar"
|
||||||
if (data.contains("json_schema") && data.contains("grammar")) {
|
if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) {
|
||||||
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
|
||||||
return false;
|
return false;
|
||||||
} else if (data.contains("json_schema") && !data.contains("grammar")) {
|
} else if (data.contains("json_schema") && !data.contains("grammar")) {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}"
|
# ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}"
|
||||||
# python examples/json-schema-to-grammar.py https://json.schemastore.org/tsconfig.json
|
# python examples/json_schema_to_grammar.py https://json.schemastore.org/tsconfig.json
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -25,4 +25,4 @@ npx ts-json-schema-generator --unstable --no-top-ref --path "$DTS_FILE" --type M
|
|||||||
# https://github.com/YousefED/typescript-json-schema
|
# https://github.com/YousefED/typescript-json-schema
|
||||||
# npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2
|
# npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2
|
||||||
|
|
||||||
./examples/json-schema-to-grammar.py "$SCHEMA_FILE"
|
./examples/json_schema_to_grammar.py "$SCHEMA_FILE"
|
||||||
|
@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
|
|||||||
```
|
```
|
||||||
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
|
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
|
||||||
|
|
||||||
|
### Efficient optional repetitions
|
||||||
|
|
||||||
|
A common pattern is to allow repetitions of a pattern `x` up to N times.
|
||||||
|
|
||||||
|
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)
|
||||||
|
@ -104,16 +104,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
R"""(
|
R"""(
|
||||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
null ::= "null" space
|
null ::= "null" space
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||||
root ::= object
|
root ::= object
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
value ::= object | array | string | number | boolean | null
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
value ::= object | array | string | number | boolean
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -133,10 +133,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
date-string ::= "\"" date "\"" space
|
date-string ::= "\"" date "\"" space
|
||||||
date-time ::= date "T" time
|
date-time ::= date "T" time
|
||||||
date-time-string ::= "\"" date-time "\"" space
|
date-time-string ::= "\"" date-time "\"" space
|
||||||
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
|
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
||||||
time-string ::= "\"" time "\"" space
|
time-string ::= "\"" time "\"" space
|
||||||
|
tuple-0 ::= date-string
|
||||||
|
tuple-2 ::= time-string
|
||||||
|
tuple-3 ::= date-time-string
|
||||||
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -148,10 +151,65 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
root ::= "\"" (
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
[^"\\] |
|
root ::= "\"" char* "\"" space
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
space ::= " "?
|
||||||
)* "\"" space
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"string w/ min length 1",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
root ::= "\"" char+ "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"string w/ min length 3",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 3
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
root ::= "\"" char char char (char)* "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"string w/ max length",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"maxLength": 3
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
root ::= "\"" (char (char (char)?)?)? "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"string w/ min & max length",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1,
|
||||||
|
"maxLength": 4
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
root ::= "\"" char (char (char (char)?)?)? "\"" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -175,7 +233,8 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"type": "integer"
|
"type": "integer"
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
root ::= ("-"? integral-part) space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -223,12 +282,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"prefixItems": [{ "type": "string" }]
|
"prefixItems": [{ "type": "string" }]
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
root ::= "[" space string "]" space
|
root ::= "[" space string "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -239,13 +296,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"prefixItems": [{ "type": "string" }, { "type": "number" }]
|
"prefixItems": [{ "type": "string" }, { "type": "number" }]
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "[" space string "," space number "]" space
|
root ::= "[" space string "," space number "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -256,7 +313,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"type": "number"
|
"type": "number"
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -272,7 +331,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space
|
root ::= "[" space boolean "," space boolean ("," space boolean)* "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -288,7 +347,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
root ::= "[" space ( boolean )? "]" space
|
root ::= "[" space (boolean)? "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -304,7 +363,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
root ::= "[" space ( boolean ( "," space boolean )? )? "]" space
|
root ::= "[" space (boolean ("," space boolean)?)? "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -320,10 +379,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"maxItems": 5
|
"maxItems": 5
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integer ::= ("-"? integral-part) space
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
item ::= number | integer
|
item ::= number | integer
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space
|
root ::= "[" space item "," space item "," space item ("," space item ("," space item)?)? "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
@ -372,11 +433,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"regexp",
|
"regexp",
|
||||||
R"""({
|
R"""({
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$"
|
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} a{3,5}nd...$"
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
dot ::= [^\x0A\x0D]
|
||||||
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
root ::= "\"" ("(" root-1 (root-1 (root-1)?)? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " " "aaa" ("a" ("a")?)? "nd" dot dot dot "\"" space
|
||||||
root-1 ::= [0-9]
|
root-1 ::= [0-9]
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
@ -404,12 +465,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
a-kv ::= "\"a\"" space ":" space string
|
a-kv ::= "\"a\"" space ":" space string
|
||||||
b-kv ::= "\"b\"" space ":" space string
|
b-kv ::= "\"b\"" space ":" space string
|
||||||
c-kv ::= "\"c\"" space ":" space string
|
c-kv ::= "\"c\"" space ":" space string
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
|
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -426,12 +485,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
a-kv ::= "\"a\"" space ":" space string
|
a-kv ::= "\"a\"" space ":" space string
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
root ::= "{" space (a-kv )? "}" space
|
root ::= "{" space (a-kv )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -452,12 +509,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
b-kv ::= "\"b\"" space ":" space string
|
b-kv ::= "\"b\"" space ":" space string
|
||||||
b-rest ::= ( "," space c-kv )?
|
b-rest ::= ( "," space c-kv )?
|
||||||
c-kv ::= "\"c\"" space ":" space string
|
c-kv ::= "\"c\"" space ":" space string
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
|
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -478,14 +533,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
a-kv ::= "\"a\"" space ":" space string
|
a-kv ::= "\"a\"" space ":" space string
|
||||||
b-kv ::= "\"b\"" space ":" space string
|
b-kv ::= "\"b\"" space ":" space string
|
||||||
c-kv ::= "\"c\"" space ":" space string
|
c-kv ::= "\"c\"" space ":" space string
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
d-kv ::= "\"d\"" space ":" space string
|
d-kv ::= "\"d\"" space ":" space string
|
||||||
d-rest ::= ( "," space c-kv )?
|
d-rest ::= ( "," space c-kv )?
|
||||||
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -499,14 +552,14 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
R"""(
|
R"""(
|
||||||
additional-kv ::= string ":" space additional-value
|
additional-kv ::= string ":" space additional-value
|
||||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||||
additional-value ::= "[" space ( number ( "," space number )* )? "]" space
|
additional-value ::= "[" space (number ("," space number)*)? "]" space
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "{" space (additional-kvs )? "}" space
|
root ::= "{" space (additional-kvs )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -520,16 +573,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
R"""(
|
R"""(
|
||||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
null ::= "null" space
|
null ::= "null" space
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||||
root ::= object
|
root ::= object
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
value ::= object | array | string | number | boolean | null
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
value ::= object | array | string | number | boolean
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -542,16 +595,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
R"""(
|
R"""(
|
||||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||||
boolean ::= ("true" | "false") space
|
boolean ::= ("true" | "false") space
|
||||||
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
null ::= "null" space
|
null ::= "null" space
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||||
root ::= object
|
root ::= object
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
value ::= object | array | string | number | boolean | null
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
value ::= object | array | string | number | boolean
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -583,13 +636,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
a-kv ::= "\"a\"" space ":" space number
|
a-kv ::= "\"a\"" space ":" space number
|
||||||
additional-kv ::= string ":" space string
|
additional-kv ::= string ":" space string
|
||||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
|
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -608,13 +661,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
a-rest ::= additional-kvs
|
a-rest ::= additional-kvs
|
||||||
additional-kv ::= string ":" space number
|
additional-kv ::= string ":" space number
|
||||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
|
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -636,13 +689,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||||
b-kv ::= "\"b\"" space ":" space number
|
b-kv ::= "\"b\"" space ":" space number
|
||||||
b-rest ::= additional-kvs
|
b-rest ::= additional-kvs
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
|
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -650,9 +703,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
SUCCESS,
|
SUCCESS,
|
||||||
"top-level $ref",
|
"top-level $ref",
|
||||||
R"""({
|
R"""({
|
||||||
"$ref": "#/definitions/MyType",
|
"$ref": "#/definitions/foo",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"MyType": {
|
"foo": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"a": {
|
"a": {
|
||||||
@ -667,14 +720,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
}
|
}
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
MyType ::= "{" space MyType-a-kv "}" space
|
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
MyType-a-kv ::= "\"a\"" space ":" space string
|
foo ::= "{" space foo-a-kv "}" space
|
||||||
root ::= MyType
|
foo-a-kv ::= "\"a\"" space ":" space string
|
||||||
|
root ::= foo
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
string ::= "\"" (
|
string ::= "\"" char* "\"" space
|
||||||
[^"\\] |
|
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
||||||
)* "\"" space
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -701,9 +752,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
alternative-1 ::= bar
|
alternative-1 ::= bar
|
||||||
bar ::= "{" space (bar-b-kv )? "}" space
|
bar ::= "{" space (bar-b-kv )? "}" space
|
||||||
bar-b-kv ::= "\"b\"" space ":" space number
|
bar-b-kv ::= "\"b\"" space ":" space number
|
||||||
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
foo ::= "{" space (foo-a-kv )? "}" space
|
foo ::= "{" space (foo-a-kv )? "}" space
|
||||||
foo-a-kv ::= "\"a\"" space ":" space number
|
foo-a-kv ::= "\"a\"" space ":" space number
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= alternative-0 | alternative-1
|
root ::= alternative-0 | alternative-1
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
@ -745,7 +798,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
c-kv ::= "\"c\"" space ":" space number
|
c-kv ::= "\"c\"" space ":" space number
|
||||||
d-kv ::= "\"d\"" space ":" space number
|
d-kv ::= "\"d\"" space ":" space number
|
||||||
d-rest ::= ( "," space c-kv )?
|
d-rest ::= ( "," space c-kv )?
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
@ -786,7 +841,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"definitions": {}
|
"definitions": {}
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
number- ::= "{" space number-number-kv "}" space
|
number- ::= "{" space number-number-kv "}" space
|
||||||
number-kv ::= "\"number\"" space ":" space number-
|
number-kv ::= "\"number\"" space ":" space number-
|
||||||
number-number ::= "{" space number-number-root-kv "}" space
|
number-number ::= "{" space number-number-root-kv "}" space
|
||||||
@ -816,7 +873,7 @@ int main() {
|
|||||||
test_all("Python", [](const TestCase & tc) {
|
test_all("Python", [](const TestCase & tc) {
|
||||||
write("test-json-schema-input.tmp", tc.schema);
|
write("test-json-schema-input.tmp", tc.schema);
|
||||||
tc.verify_status(std::system(
|
tc.verify_status(std::system(
|
||||||
"python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
"python ./examples/json_schema_to_grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||||
tc.verify(read("test-grammar-output.tmp"));
|
tc.verify(read("test-grammar-output.tmp"));
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user