json: refine constraint for whitespace to avoid runaways yet allow pretty print (#7866)

This commit is contained in:
Olivier Chafik 2024-06-11 02:22:57 +01:00 committed by GitHub
parent 396b18dfec
commit b61eb9644d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 44 additions and 45 deletions

View File

@ -40,7 +40,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
return result;
}
const std::string SPACE_RULE = "\" \"?";
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
struct BuiltinRule {
std::string content;

View File

@ -29,9 +29,8 @@ class BuiltinRule:
self.content = content
self.deps = deps or []
# whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality?
SPACE_RULE = '" "?'
# Constraining spaces to prevent model "running away".
SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}'
PRIMITIVE_RULES = {
'boolean' : BuiltinRule('("true" | "false") space', []),

View File

@ -1,5 +1,5 @@
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
const SPACE_RULE = '" "?';
const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}';
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
if (minItems === 0 && maxItems === 1) {

View File

@ -22,4 +22,4 @@ string ::=
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= [ \t\n]{0,20}
ws ::= | " " | "\n" [ \t]{0,20}

View File

@ -31,4 +31,4 @@ string ::=
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= [ \t\n]{0,20}
ws ::= | " " | "\n" [ \t]{0,20}

View File

@ -112,7 +112,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
@ -135,7 +135,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
date-time ::= date "T" time
date-time-string ::= "\"" date-time "\"" space
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
time-string ::= "\"" time "\"" space
tuple-0 ::= date-string
@ -154,7 +154,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char* "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -168,7 +168,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char+ "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -182,7 +182,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{3,} "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -196,7 +196,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{0,3} "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -211,7 +211,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{1,4} "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -223,7 +223,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= ("true" | "false") space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -236,7 +236,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= ("-"? integral-part) space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -248,7 +248,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "\"foo\""
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -260,7 +260,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "123"
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -272,7 +272,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -285,7 +285,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "[" space string "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -302,7 +302,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space string "," space number "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -317,7 +317,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -333,7 +333,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space boolean ("," space boolean)+ "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -349,7 +349,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space boolean? "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -365,7 +365,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space (boolean ("," space boolean)?)? "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -386,7 +386,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
item ::= number | integer
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space item ("," space item){2,4} "]" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -399,7 +399,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -412,7 +412,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "\"" "[]{}()|+*?" "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -425,7 +425,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "\"" "\"" "\"" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -440,7 +440,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
dot ::= [^\x0A\x0D]
root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space
root-1 ::= [0-9]
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -468,7 +468,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -488,7 +488,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
a-kv ::= "\"a\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space (a-kv )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -512,7 +512,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -538,7 +538,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
d-kv ::= "\"d\"" space ":" space string
d-rest ::= ( "," space c-kv )?
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -559,7 +559,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (additional-kvs )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -581,7 +581,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
@ -603,7 +603,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
@ -618,7 +618,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""",
R"""(
root ::= "{" space "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -642,7 +642,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -667,7 +667,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -695,7 +695,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -725,7 +725,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
foo ::= "{" space foo-a-kv "}" space
foo-a-kv ::= "\"a\"" space ":" space string
root ::= foo
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
@ -759,7 +759,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= alternative-0 | alternative-1
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -803,7 +803,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
@ -851,7 +851,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
number-number-kv ::= "\"number\"" space ":" space number-number
number-number-root-kv ::= "\"root\"" space ":" space number
root ::= "{" space number-kv "}" space
space ::= " "?
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
}