json-schema-to-grammar improvements (+ added to server) (#5978)
* json: fix arrays (disallow `[,1]`)
* json: support tuple types (`[number, string]`)
* json: support additionalProperties (`{[k: string]: [string,number][]}`)
* json: support required / optional properties
* json: add support for pattern
* json: resolve $ref (and support https schema urls)
* json: fix $ref resolution
* join: support union types (mostly for nullable types I think)
* json: support allOf + nested anyOf
* json: support any (`{}` or `{type: object}`)
* json: fix merge
* json: temp fix for escapes
* json: spaces in output and unrestricted output spaces
* json: add typings
* json:fix typo
* Create ts-type-to-grammar.sh
* json: fix _format_literal (json.dumps already escapes quotes)
* json: merge lit sequences and handle negatives
{"type": "string", "pattern": "^({\"question\": \"[^\"]+\", \"response\": \"[^\"]+\"}\\n)+$"}
* json: handle pattern repetitions
* Update json-schema-to-grammar.mjs
* Create regex-to-grammar.py
* json: extract repeated regexp patterns to subrule
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* json: handle schema from pydantic Optional fields
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update ts-type-to-grammar.sh
* Update ts-type-to-grammar.sh
* json: simplify nullable fields handling
* json: accept duplicate identical rules
* json: revert space to 1 at most
* json: reuse regexp pattern subrules
* json: handle uuid string format
* json: fix literal escapes
* json: add --allow-fetch
* json: simplify range escapes
* json: support negative ranges in patterns
* Delete commit.txt
* json: custom regex parser, adds dot support & JS-portable
* json: rm trailing spaces
* Update json-schema-to-grammar.mjs
* json: updated server & chat `( cd examples/server && ./deps.sh )`
* json: port fixes from mjs to python
* Update ts-type-to-grammar.sh
* json: support prefixItems alongside array items
* json: add date format + fix uuid
* json: add date, time, date-time formats
* json: preserve order of props from TS defs
* json: port schema converter to C++, wire in ./server
* json: nits
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* json: fix mjs implementation + align outputs
* Update json-schema-to-grammar.mjs.hpp
* json: test C++, JS & Python versions
* json: nits + regen deps
* json: cleanup test
* json: revert from c++17 to 11
* json: nit fixes
* json: dirty include for test
* json: fix zig build
* json: pass static command to std::system in tests (fixed temp files)
* json: fix top-level $refs
* json: don't use c++20 designated initializers
* nit
* json: basic support for reserved names `{number:{number:{root:number}}}`
* Revamp test cmake to allow args (WORKING_DIRECTORY needed for JSON test)
* json: re-ran server deps.sh
* json: simplify test
* json: support mix of additional props & required/optional
* json: add tests for some expected failures
* json: fix type=const in c++, add failure expectations for non-str const&enum
* json: test (& simplify output of) empty schema
* json: check parsing in test + fix value & string refs
* json: add server tests for OAI JSON response_format
* json: test/fix top-level anyOf
* json: improve grammar parsing failures
* json: test/fix additional props corner cases
* json: fix string patterns (was missing quotes)
* json: ws nit
* json: fix json handling in server when there's no response_format
* json: catch schema conversion errors in server
* json: don't complain about unknown format type in server if unset
* json: cleaner build of test
* json: create examples/json-schema-pydantic-example.py
* json: fix date pattern
* json: move json.hpp & json-schema-to-grammar.{cpp,h} to common
* json: indent 4 spaces
* json: fix naming of top-level c++ function (+ drop unused one)
* json: avoid using namespace std
* json: fix zig build
* Update server.feature
* json: iostream -> fprintf
* json: space before & refs for consistency
* json: nits
2024-03-21 11:50:43 +00:00
|
|
|
#ifdef NDEBUG
|
|
|
|
#undef NDEBUG
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <regex>
|
|
|
|
|
|
|
|
#include "json-schema-to-grammar.h"
|
|
|
|
#include "grammar-parser.h"
|
|
|
|
|
|
|
|
static std::string trim(const std::string & source) {
|
|
|
|
std::string s(source);
|
|
|
|
s.erase(0,s.find_first_not_of(" \n\r\t"));
|
|
|
|
s.erase(s.find_last_not_of(" \n\r\t")+1);
|
|
|
|
return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1");
|
|
|
|
}
|
|
|
|
|
|
|
|
enum TestCaseStatus {
|
|
|
|
SUCCESS,
|
|
|
|
FAILURE
|
|
|
|
};
|
|
|
|
|
|
|
|
struct TestCase {
|
|
|
|
TestCaseStatus expected_status;
|
|
|
|
std::string name;
|
|
|
|
std::string schema;
|
|
|
|
std::string expected_grammar;
|
|
|
|
|
|
|
|
void _print_failure_header() const {
|
|
|
|
fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str());
|
|
|
|
}
|
|
|
|
void verify(const std::string & actual_grammar) const {
|
|
|
|
if (trim(actual_grammar) != trim(expected_grammar)) {
|
|
|
|
_print_failure_header();
|
|
|
|
fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str());
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void verify_expectation_parseable() const {
|
|
|
|
try {
|
|
|
|
auto state = grammar_parser::parse(expected_grammar.c_str());
|
|
|
|
if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
|
|
|
|
throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
|
|
|
|
}
|
|
|
|
} catch (const std::runtime_error & ex) {
|
|
|
|
_print_failure_header();
|
|
|
|
fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what());
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void verify_status(TestCaseStatus status) const {
|
|
|
|
if (status != expected_status) {
|
|
|
|
_print_failure_header();
|
|
|
|
fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE");
|
|
|
|
fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE");
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static void write(const std::string & file, const std::string & content) {
|
|
|
|
std::ofstream f;
|
|
|
|
f.open(file.c_str());
|
|
|
|
f << content.c_str();
|
|
|
|
f.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string read(const std::string & file) {
|
|
|
|
std::ostringstream actuals;
|
|
|
|
actuals << std::ifstream(file.c_str()).rdbuf();
|
|
|
|
return actuals.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) {
|
|
|
|
fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str());
|
|
|
|
auto test = [&](const TestCase & tc) {
|
|
|
|
fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : "");
|
|
|
|
runner(tc);
|
|
|
|
};
|
|
|
|
|
|
|
|
test({
|
|
|
|
FAILURE,
|
|
|
|
"unknown type",
|
|
|
|
R"""({
|
|
|
|
"type": "kaboom"
|
|
|
|
})""",
|
|
|
|
""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
FAILURE,
|
|
|
|
"invalid type type",
|
|
|
|
R"""({
|
|
|
|
"type": 123
|
|
|
|
})""",
|
|
|
|
""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"empty schema (object)",
|
|
|
|
"{}",
|
|
|
|
R"""(
|
|
|
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
null ::= "null" space
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
|
|
|
root ::= object
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
value ::= object | array | string | number | boolean
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"exotic formats",
|
|
|
|
R"""({
|
|
|
|
"items": [
|
|
|
|
{ "format": "date" },
|
|
|
|
{ "format": "uuid" },
|
|
|
|
{ "format": "time" },
|
|
|
|
{ "format": "date-time" }
|
|
|
|
]
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
date ::= [0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] )
|
|
|
|
date-string ::= "\"" date "\"" space
|
|
|
|
date-time ::= date "T" time
|
|
|
|
date-time-string ::= "\"" date-time "\"" space
|
|
|
|
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
|
|
|
|
space ::= " "?
|
|
|
|
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
|
|
|
time-string ::= "\"" time "\"" space
|
|
|
|
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"string",
|
|
|
|
R"""({
|
|
|
|
"type": "string"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"boolean",
|
|
|
|
R"""({
|
|
|
|
"type": "boolean"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= ("true" | "false") space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"integer",
|
|
|
|
R"""({
|
|
|
|
"type": "integer"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"string const",
|
|
|
|
R"""({
|
|
|
|
"const": "foo"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "\"foo\""
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
FAILURE,
|
|
|
|
"non-string const",
|
|
|
|
R"""({
|
|
|
|
"const": 123
|
|
|
|
})""",
|
|
|
|
""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
FAILURE,
|
|
|
|
"non-string enum",
|
|
|
|
R"""({
|
|
|
|
"enum": [123]
|
|
|
|
})""",
|
|
|
|
""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"tuple1",
|
|
|
|
R"""({
|
|
|
|
"prefixItems": [{ "type": "string" }]
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "[" space string "]" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"tuple2",
|
|
|
|
R"""({
|
|
|
|
"prefixItems": [{ "type": "string" }, { "type": "number" }]
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "[" space string "," space number "]" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"number",
|
|
|
|
R"""({
|
|
|
|
"type": "number"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"minItems",
|
|
|
|
R"""({
|
|
|
|
"items": {
|
|
|
|
"type": "boolean"
|
|
|
|
},
|
|
|
|
"minItems": 2
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"maxItems 1",
|
|
|
|
R"""({
|
|
|
|
"items": {
|
|
|
|
"type": "boolean"
|
|
|
|
},
|
|
|
|
"maxItems": 1
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
root ::= "[" space ( boolean )? "]" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"maxItems 2",
|
|
|
|
R"""({
|
|
|
|
"items": {
|
|
|
|
"type": "boolean"
|
|
|
|
},
|
|
|
|
"maxItems": 2
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
root ::= "[" space ( boolean ( "," space boolean )? )? "]" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"min + maxItems",
|
|
|
|
R"""({
|
|
|
|
"items": {
|
|
|
|
"type": ["number", "integer"]
|
|
|
|
},
|
|
|
|
"minItems": 3,
|
|
|
|
"maxItems": 5
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
|
|
|
item ::= number | integer
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"simple regexp",
|
|
|
|
R"""({
|
|
|
|
"type": "string",
|
|
|
|
"pattern": "^abc?d*efg+(hij)?kl$"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"regexp escapes",
|
|
|
|
R"""({
|
|
|
|
"type": "string",
|
|
|
|
"pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "\"" "[]{}()|+*?" "\"" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"regexp quote",
|
|
|
|
R"""({
|
|
|
|
"type": "string",
|
|
|
|
"pattern": "^\"$"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "\"" "\"" "\"" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"regexp",
|
|
|
|
R"""({
|
|
|
|
"type": "string",
|
|
|
|
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
|
|
|
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
|
|
|
root-1 ::= [0-9]
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"required props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"a": {
|
|
|
|
"type": "string"
|
|
|
|
},
|
|
|
|
"b": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"a",
|
|
|
|
"b"
|
|
|
|
],
|
|
|
|
"additionalProperties": false,
|
|
|
|
"definitions": {}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space string
|
|
|
|
b-kv ::= "\"b\"" space ":" space string
|
|
|
|
root ::= "{" space a-kv "," space b-kv "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"1 optional prop",
|
|
|
|
R"""({
|
|
|
|
"properties": {
|
|
|
|
"a": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"additionalProperties": false
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space string
|
|
|
|
root ::= "{" space (a-kv )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"N optional props",
|
|
|
|
R"""({
|
|
|
|
"properties": {
|
|
|
|
"a": {"type": "string"},
|
|
|
|
"b": {"type": "string"},
|
|
|
|
"c": {"type": "string"}
|
|
|
|
},
|
|
|
|
"additionalProperties": false
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space string
|
|
|
|
a-rest ::= ( "," space b-kv )? b-rest
|
|
|
|
b-kv ::= "\"b\"" space ":" space string
|
|
|
|
b-rest ::= ( "," space c-kv )?
|
|
|
|
c-kv ::= "\"c\"" space ":" space string
|
|
|
|
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"required + optional props",
|
|
|
|
R"""({
|
|
|
|
"properties": {
|
|
|
|
"a": {"type": "string"},
|
|
|
|
"b": {"type": "string"},
|
|
|
|
"c": {"type": "string"},
|
|
|
|
"d": {"type": "string"}
|
|
|
|
},
|
|
|
|
"required": ["a", "b"],
|
|
|
|
"additionalProperties": false
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space string
|
|
|
|
b-kv ::= "\"b\"" space ":" space string
|
|
|
|
c-kv ::= "\"c\"" space ":" space string
|
|
|
|
c-rest ::= ( "," space d-kv )?
|
|
|
|
d-kv ::= "\"d\"" space ":" space string
|
|
|
|
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"additional props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"additionalProperties": {"type": "array", "items": {"type": "number"}}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
additional-kv ::= string ":" space additional-value
|
|
|
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
|
|
|
additional-value ::= "[" space ( number ( "," space number )* )? "]" space
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "{" space (additional-kvs )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"additional props (true)",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"additionalProperties": true
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
null ::= "null" space
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
|
|
|
root ::= object
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
value ::= object | array | string | number | boolean
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"additional props (implicit)",
|
|
|
|
R"""({
|
|
|
|
"type": "object"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
array ::= "[" space ( value ("," space value)* )? "]" space
|
|
|
|
boolean ::= ("true" | "false") space
|
|
|
|
null ::= "null" space
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
|
|
|
root ::= object
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
value ::= object | array | string | number | boolean
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"empty w/o additional props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"additionalProperties": false
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
root ::= "{" space "}" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"required + additional props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"a": {"type": "number"}
|
|
|
|
},
|
|
|
|
"required": ["a"],
|
|
|
|
"additionalProperties": {"type": "string"}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space number
|
|
|
|
additional-kv ::= string ":" space string
|
|
|
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"optional + additional props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"a": {"type": "number"}
|
|
|
|
},
|
|
|
|
"additionalProperties": {"type": "number"}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space number
|
|
|
|
a-rest ::= additional-kvs
|
|
|
|
additional-kv ::= string ":" space number
|
|
|
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"required + optional + additional props",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"a": {"type": "number"},
|
|
|
|
"b": {"type": "number"}
|
|
|
|
},
|
|
|
|
"required": ["a"],
|
|
|
|
"additionalProperties": {"type": "number"}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space number
|
|
|
|
additional-kv ::= string ":" space number
|
|
|
|
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
|
|
|
b-kv ::= "\"b\"" space ":" space number
|
|
|
|
b-rest ::= additional-kvs
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"top-level $ref",
|
|
|
|
R"""({
|
|
|
|
"$ref": "#/definitions/MyType",
|
|
|
|
"definitions": {
|
|
|
|
"MyType": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"a": {
|
|
|
|
"type": "string"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"a"
|
|
|
|
],
|
|
|
|
"additionalProperties": false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
MyType ::= "{" space MyType-a-kv "}" space
|
|
|
|
MyType-a-kv ::= "\"a\"" space ":" space string
|
|
|
|
root ::= MyType
|
|
|
|
space ::= " "?
|
|
|
|
string ::= "\"" (
|
|
|
|
[^"\\] |
|
|
|
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
|
|
|
)* "\"" space
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"anyOf",
|
|
|
|
R"""({
|
|
|
|
"anyOf": [
|
|
|
|
{"$ref": "#/definitions/foo"},
|
|
|
|
{"$ref": "#/definitions/bar"}
|
|
|
|
],
|
|
|
|
"definitions": {
|
|
|
|
"foo": {
|
|
|
|
"properties": {"a": {"type": "number"}}
|
|
|
|
},
|
|
|
|
"bar": {
|
|
|
|
"properties": {"b": {"type": "number"}}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"type": "object"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
alternative-0 ::= foo
|
|
|
|
alternative-1 ::= bar
|
|
|
|
bar ::= "{" space (bar-b-kv )? "}" space
|
|
|
|
bar-b-kv ::= "\"b\"" space ":" space number
|
|
|
|
foo ::= "{" space (foo-a-kv )? "}" space
|
|
|
|
foo-a-kv ::= "\"a\"" space ":" space number
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= alternative-0 | alternative-1
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
|
|
|
|
R"""({
|
|
|
|
"allOf": [
|
|
|
|
{"$ref": "#/definitions/foo"},
|
|
|
|
{"$ref": "#/definitions/bar"},
|
|
|
|
{
|
|
|
|
"anyOf": [
|
|
|
|
{"$ref": "#/definitions/baz"},
|
|
|
|
{"$ref": "#/definitions/bam"}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"definitions": {
|
|
|
|
"foo": {
|
|
|
|
"properties": {"a": {"type": "number"}}
|
|
|
|
},
|
|
|
|
"bar": {
|
|
|
|
"properties": {"b": {"type": "number"}}
|
|
|
|
},
|
|
|
|
"bam": {
|
|
|
|
"properties": {"c": {"type": "number"}}
|
|
|
|
},
|
|
|
|
"baz": {
|
|
|
|
"properties": {"d": {"type": "number"}}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"type": "object"
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
a-kv ::= "\"a\"" space ":" space number
|
|
|
|
b-kv ::= "\"b\"" space ":" space number
|
|
|
|
c-kv ::= "\"c\"" space ":" space number
|
|
|
|
d-kv ::= "\"d\"" space ":" space number
|
|
|
|
d-rest ::= ( "," space c-kv )?
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
|
|
|
|
test({
|
|
|
|
SUCCESS,
|
|
|
|
"conflicting names",
|
|
|
|
R"""({
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"number": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"number": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"root": {
|
|
|
|
"type": "number"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"root"
|
|
|
|
],
|
|
|
|
"additionalProperties": false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"number"
|
|
|
|
],
|
|
|
|
"additionalProperties": false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"number"
|
|
|
|
],
|
|
|
|
"additionalProperties": false,
|
|
|
|
"definitions": {}
|
|
|
|
})""",
|
|
|
|
R"""(
|
|
|
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
|
|
|
number- ::= "{" space number-number-kv "}" space
|
|
|
|
number-kv ::= "\"number\"" space ":" space number-
|
|
|
|
number-number ::= "{" space number-number-root-kv "}" space
|
|
|
|
number-number-kv ::= "\"number\"" space ":" space number-number
|
|
|
|
number-number-root-kv ::= "\"root\"" space ":" space number
|
|
|
|
root ::= "{" space number-kv "}" space
|
|
|
|
space ::= " "?
|
|
|
|
)"""
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
test_all("C++", [](const TestCase & tc) {
|
|
|
|
try {
|
|
|
|
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
|
|
|
|
tc.verify_status(SUCCESS);
|
|
|
|
} catch (const std::runtime_error & ex) {
|
|
|
|
fprintf(stderr, "Error: %s\n", ex.what());
|
|
|
|
tc.verify_status(FAILURE);
|
|
|
|
}
|
|
|
|
});
|
2024-03-21 14:20:05 +00:00
|
|
|
//test_all("Python", [](const TestCase & tc) {
|
|
|
|
// write("test-json-schema-input.tmp", tc.schema);
|
|
|
|
// tc.verify_status(std::system(
|
|
|
|
// "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
|
|
|
// tc.verify(read("test-grammar-output.tmp"));
|
|
|
|
//});
|
|
|
|
//test_all("JavaScript", [](const TestCase & tc) {
|
|
|
|
// write("test-json-schema-input.tmp", tc.schema);
|
|
|
|
// tc.verify_status(std::system(
|
|
|
|
// "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
|
|
|
// tc.verify(read("test-grammar-output.tmp"));
|
|
|
|
//});
|
json-schema-to-grammar improvements (+ added to server) (#5978)
* json: fix arrays (disallow `[,1]`)
* json: support tuple types (`[number, string]`)
* json: support additionalProperties (`{[k: string]: [string,number][]}`)
* json: support required / optional properties
* json: add support for pattern
* json: resolve $ref (and support https schema urls)
* json: fix $ref resolution
* join: support union types (mostly for nullable types I think)
* json: support allOf + nested anyOf
* json: support any (`{}` or `{type: object}`)
* json: fix merge
* json: temp fix for escapes
* json: spaces in output and unrestricted output spaces
* json: add typings
* json:fix typo
* Create ts-type-to-grammar.sh
* json: fix _format_literal (json.dumps already escapes quotes)
* json: merge lit sequences and handle negatives
{"type": "string", "pattern": "^({\"question\": \"[^\"]+\", \"response\": \"[^\"]+\"}\\n)+$"}
* json: handle pattern repetitions
* Update json-schema-to-grammar.mjs
* Create regex-to-grammar.py
* json: extract repeated regexp patterns to subrule
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* json: handle schema from pydantic Optional fields
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update ts-type-to-grammar.sh
* Update ts-type-to-grammar.sh
* json: simplify nullable fields handling
* json: accept duplicate identical rules
* json: revert space to 1 at most
* json: reuse regexp pattern subrules
* json: handle uuid string format
* json: fix literal escapes
* json: add --allow-fetch
* json: simplify range escapes
* json: support negative ranges in patterns
* Delete commit.txt
* json: custom regex parser, adds dot support & JS-portable
* json: rm trailing spaces
* Update json-schema-to-grammar.mjs
* json: updated server & chat `( cd examples/server && ./deps.sh )`
* json: port fixes from mjs to python
* Update ts-type-to-grammar.sh
* json: support prefixItems alongside array items
* json: add date format + fix uuid
* json: add date, time, date-time formats
* json: preserve order of props from TS defs
* json: port schema converter to C++, wire in ./server
* json: nits
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* json: fix mjs implementation + align outputs
* Update json-schema-to-grammar.mjs.hpp
* json: test C++, JS & Python versions
* json: nits + regen deps
* json: cleanup test
* json: revert from c++17 to 11
* json: nit fixes
* json: dirty include for test
* json: fix zig build
* json: pass static command to std::system in tests (fixed temp files)
* json: fix top-level $refs
* json: don't use c++20 designated initializers
* nit
* json: basic support for reserved names `{number:{number:{root:number}}}`
* Revamp test cmake to allow args (WORKING_DIRECTORY needed for JSON test)
* json: re-ran server deps.sh
* json: simplify test
* json: support mix of additional props & required/optional
* json: add tests for some expected failures
* json: fix type=const in c++, add failure expectations for non-str const&enum
* json: test (& simplify output of) empty schema
* json: check parsing in test + fix value & string refs
* json: add server tests for OAI JSON response_format
* json: test/fix top-level anyOf
* json: improve grammar parsing failures
* json: test/fix additional props corner cases
* json: fix string patterns (was missing quotes)
* json: ws nit
* json: fix json handling in server when there's no response_format
* json: catch schema conversion errors in server
* json: don't complain about unknown format type in server if unset
* json: cleaner build of test
* json: create examples/json-schema-pydantic-example.py
* json: fix date pattern
* json: move json.hpp & json-schema-to-grammar.{cpp,h} to common
* json: indent 4 spaces
* json: fix naming of top-level c++ function (+ drop unused one)
* json: avoid using namespace std
* json: fix zig build
* Update server.feature
* json: iostream -> fprintf
* json: space before & refs for consistency
* json: nits
2024-03-21 11:50:43 +00:00
|
|
|
|
|
|
|
test_all("Check Expectations Validity", [](const TestCase & tc) {
|
|
|
|
if (tc.expected_status == SUCCESS) {
|
|
|
|
tc.verify_expectation_parseable();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|