import inspect import json from copy import copy from inspect import isclass, getdoc from types import NoneType from pydantic import BaseModel, create_model, Field from typing import Any, Type, List, get_args, get_origin, Tuple, Union, Optional, _GenericAlias from enum import Enum from typing import get_type_hints, Callable import re class PydanticDataType(Enum): """ Defines the data types supported by the grammar_generator. Attributes: STRING (str): Represents a string data type. BOOLEAN (str): Represents a boolean data type. INTEGER (str): Represents an integer data type. FLOAT (str): Represents a float data type. OBJECT (str): Represents an object data type. ARRAY (str): Represents an array data type. ENUM (str): Represents an enum data type. CUSTOM_CLASS (str): Represents a custom class data type. """ STRING = "string" TRIPLE_QUOTED_STRING = "triple_quoted_string" MARKDOWN_STRING = "markdown_string" BOOLEAN = "boolean" INTEGER = "integer" FLOAT = "float" OBJECT = "object" ARRAY = "array" ENUM = "enum" ANY = "any" NULL = "null" CUSTOM_CLASS = "custom-class" CUSTOM_DICT = "custom-dict" SET = "set" def map_pydantic_type_to_gbnf(pydantic_type: Type[Any]) -> str: if isclass(pydantic_type) and issubclass(pydantic_type, str): return PydanticDataType.STRING.value elif isclass(pydantic_type) and issubclass(pydantic_type, bool): return PydanticDataType.BOOLEAN.value elif isclass(pydantic_type) and issubclass(pydantic_type, int): return PydanticDataType.INTEGER.value elif isclass(pydantic_type) and issubclass(pydantic_type, float): return PydanticDataType.FLOAT.value elif isclass(pydantic_type) and issubclass(pydantic_type, Enum): return PydanticDataType.ENUM.value elif isclass(pydantic_type) and issubclass(pydantic_type, BaseModel): return format_model_and_field_name(pydantic_type.__name__) elif get_origin(pydantic_type) == list: element_type = get_args(pydantic_type)[0] return f"{map_pydantic_type_to_gbnf(element_type)}-list" elif get_origin(pydantic_type) == set: element_type = get_args(pydantic_type)[0] return f"{map_pydantic_type_to_gbnf(element_type)}-set" elif get_origin(pydantic_type) == Union: union_types = get_args(pydantic_type) union_rules = [map_pydantic_type_to_gbnf(ut) for ut in union_types] return f"union-{'-or-'.join(union_rules)}" elif get_origin(pydantic_type) == Optional: element_type = get_args(pydantic_type)[0] return f"optional-{map_pydantic_type_to_gbnf(element_type)}" elif isclass(pydantic_type): return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(pydantic_type.__name__)}" elif get_origin(pydantic_type) == dict: key_type, value_type = get_args(pydantic_type) return f"custom-dict-key-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(key_type))}-value-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(value_type))}" else: return "unknown" def format_model_and_field_name(model_name: str) -> str: parts = re.findall('[A-Z][^A-Z]*', model_name) if not parts: # Check if the list is empty return model_name.lower().replace("_", "-") return '-'.join(part.lower().replace("_", "-") for part in parts) def generate_list_rule(element_type): """ Generate a GBNF rule for a list of a given element type. :param element_type: The type of the elements in the list (e.g., 'string'). :return: A string representing the GBNF rule for a list of the given type. """ rule_name = f"{map_pydantic_type_to_gbnf(element_type)}-list" element_rule = map_pydantic_type_to_gbnf(element_type) list_rule = fr'{rule_name} ::= "[" {element_rule} ("," {element_rule})* "]"' return list_rule def get_members_structure(cls, rule_name): if issubclass(cls, Enum): # Handle Enum types members = [f'\"\\\"{member.value}\\\"\"' for name, member in cls.__members__.items()] return f"{cls.__name__.lower()} ::= " + " | ".join(members) if cls.__annotations__ and cls.__annotations__ != {}: result = f'{rule_name} ::= "{{"' type_list_rules = [] # Modify this comprehension members = [f' \"\\\"{name}\\\"\" ":" {map_pydantic_type_to_gbnf(param_type)}' for name, param_type in cls.__annotations__.items() if name != 'self'] result += '"," '.join(members) result += ' "}"' return result, type_list_rules elif rule_name == "custom-class-any": result = f'{rule_name} ::= ' result += 'value' type_list_rules = [] return result, type_list_rules else: init_signature = inspect.signature(cls.__init__) parameters = init_signature.parameters result = f'{rule_name} ::= "{{"' type_list_rules = [] # Modify this comprehension too members = [f' \"\\\"{name}\\\"\" ":" {map_pydantic_type_to_gbnf(param.annotation)}' for name, param in parameters.items() if name != 'self' and param.annotation != inspect.Parameter.empty] result += '", "'.join(members) result += ' "}"' return result, type_list_rules def regex_to_gbnf(regex_pattern: str) -> str: """ Translate a basic regex pattern to a GBNF rule. Note: This function handles only a subset of simple regex patterns. """ gbnf_rule = regex_pattern # Translate common regex components to GBNF gbnf_rule = gbnf_rule.replace('\\d', '[0-9]') gbnf_rule = gbnf_rule.replace('\\s', '[ \t\n]') # Handle quantifiers and other regex syntax that is similar in GBNF # (e.g., '*', '+', '?', character classes) return gbnf_rule def generate_gbnf_integer_rules(max_digit=None, min_digit=None): """ Generate GBNF Integer Rules Generates GBNF (Generalized Backus-Naur Form) rules for integers based on the given maximum and minimum digits. Parameters: max_digit (int): The maximum number of digits for the integer. Default is None. min_digit (int): The minimum number of digits for the integer. Default is None. Returns: integer_rule (str): The identifier for the integer rule generated. additional_rules (list): A list of additional rules generated based on the given maximum and minimum digits. """ additional_rules = [] # Define the rule identifier based on max_digit and min_digit integer_rule = "integer-part" if max_digit is not None: integer_rule += f"-max{max_digit}" if min_digit is not None: integer_rule += f"-min{min_digit}" # Handling Integer Rules if max_digit is not None or min_digit is not None: # Start with an empty rule part integer_rule_part = '' # Add mandatory digits as per min_digit if min_digit is not None: integer_rule_part += '[0-9] ' * min_digit # Add optional digits up to max_digit if max_digit is not None: optional_digits = max_digit - (min_digit if min_digit is not None else 0) integer_rule_part += ''.join(['[0-9]? ' for _ in range(optional_digits)]) # Trim the rule part and append it to additional rules integer_rule_part = integer_rule_part.strip() if integer_rule_part: additional_rules.append(f'{integer_rule} ::= {integer_rule_part}') return integer_rule, additional_rules def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None): """ Generate GBNF float rules based on the given constraints. :param max_digit: Maximum number of digits in the integer part (default: None) :param min_digit: Minimum number of digits in the integer part (default: None) :param max_precision: Maximum number of digits in the fractional part (default: None) :param min_precision: Minimum number of digits in the fractional part (default: None) :return: A tuple containing the float rule and additional rules as a list Example Usage: max_digit = 3 min_digit = 1 max_precision = 2 min_precision = 1 generate_gbnf_float_rules(max_digit, min_digit, max_precision, min_precision) Output: ('float-3-1-2-1', ['integer-part-max3-min1 ::= [0-9] [0-9] [0-9]?', 'fractional-part-max2-min1 ::= [0-9] [0-9]?', 'float-3-1-2-1 ::= integer-part-max3-min1 "." fractional-part-max2-min *1']) Note: GBNF stands for Generalized Backus-Naur Form, which is a notation technique to specify the syntax of programming languages or other formal grammars. """ additional_rules = [] # Define the integer part rule integer_part_rule = "integer-part" + (f"-max{max_digit}" if max_digit is not None else "") + ( f"-min{min_digit}" if min_digit is not None else "") # Define the fractional part rule based on precision constraints fractional_part_rule = "fractional-part" fractional_rule_part = '' if max_precision is not None or min_precision is not None: fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + ( f"-min{min_precision}" if min_precision is not None else "") # Minimum number of digits fractional_rule_part = '[0-9]' * (min_precision if min_precision is not None else 1) # Optional additional digits fractional_rule_part += ''.join([' [0-9]?'] * ( (max_precision - (min_precision if min_precision is not None else 1)) if max_precision is not None else 0)) additional_rules.append(f'{fractional_part_rule} ::= {fractional_rule_part}') # Define the float rule float_rule = f"float-{max_digit if max_digit is not None else 'X'}-{min_digit if min_digit is not None else 'X'}-{max_precision if max_precision is not None else 'X'}-{min_precision if min_precision is not None else 'X'}" additional_rules.append(f'{float_rule} ::= {integer_part_rule} "." {fractional_part_rule}') # Generating the integer part rule definition, if necessary if max_digit is not None or min_digit is not None: integer_rule_part = '[0-9]' if min_digit is not None and min_digit > 1: integer_rule_part += ' [0-9]' * (min_digit - 1) if max_digit is not None: integer_rule_part += ''.join([' [0-9]?'] * (max_digit - (min_digit if min_digit is not None else 1))) additional_rules.append(f'{integer_part_rule} ::= {integer_rule_part.strip()}') return float_rule, additional_rules def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None) -> \ Tuple[str, list]: """ Generate GBNF rule for a given field type. :param model_name: Name of the model. :param field_name: Name of the field. :param field_type: Type of the field. :param is_optional: Whether the field is optional. :param processed_models: List of processed models. :param created_rules: List of created rules. :param field_info: Additional information about the field (optional). :return: Tuple containing the GBNF type and a list of additional rules. :rtype: Tuple[str, list] """ rules = [] field_name = format_model_and_field_name(field_name) gbnf_type = map_pydantic_type_to_gbnf(field_type) if isclass(field_type) and issubclass(field_type, BaseModel): nested_model_name = format_model_and_field_name(field_type.__name__) nested_model_rules = generate_gbnf_grammar(field_type, processed_models, created_rules) rules.extend(nested_model_rules) gbnf_type, rules = nested_model_name, rules elif isclass(field_type) and issubclass(field_type, Enum): enum_values = [f'\"\\\"{e.value}\\\"\"' for e in field_type] # Adding escaped quotes enum_rule = f"{model_name}-{field_name} ::= {' | '.join(enum_values)}" rules.append(enum_rule) gbnf_type, rules = model_name + "-" + field_name, rules elif get_origin(field_type) == list or field_type == list: # Array element_type = get_args(field_type)[0] element_rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules) rules.extend(additional_rules) array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """ rules.append(array_rule) gbnf_type, rules = model_name + "-" + field_name, rules elif get_origin(field_type) == set or field_type == set: # Array element_type = get_args(field_type)[0] element_rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules) rules.extend(additional_rules) array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """ rules.append(array_rule) gbnf_type, rules = model_name + "-" + field_name, rules elif gbnf_type.startswith("custom-class-"): nested_model_rules, field_types = get_members_structure(field_type, gbnf_type) rules.append(nested_model_rules) elif gbnf_type.startswith("custom-dict-"): key_type, value_type = get_args(field_type) additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-key-type", key_type, is_optional, processed_models, created_rules) additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-value-type", value_type, is_optional, processed_models, created_rules) gbnf_type = fr'{gbnf_type} ::= "{{" ( {additional_key_type} ":" {additional_value_type} ("," {additional_key_type} ":" {additional_value_type})* )? "}}" ' rules.extend(additional_key_rules) rules.extend(additional_value_rules) elif gbnf_type.startswith("union-"): union_types = get_args(field_type) union_rules = [] for union_type in union_types: if isinstance(union_type, _GenericAlias): union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(model_name, field_name, union_type, False, processed_models, created_rules) union_rules.append(union_gbnf_type) rules.extend(union_rules_list) elif not issubclass(union_type, NoneType): union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(model_name, field_name, union_type, False, processed_models, created_rules) union_rules.append(union_gbnf_type) rules.extend(union_rules_list) # Defining the union grammar rule separately if len(union_rules) == 1: union_grammar_rule = f"{model_name}-{field_name}-optional ::= {' | '.join(union_rules)} | null" else: union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}" rules.append(union_grammar_rule) if len(union_rules) == 1: gbnf_type = f"{model_name}-{field_name}-optional" else: gbnf_type = f"{model_name}-{field_name}-union" elif isclass(field_type) and issubclass(field_type, str): if field_info and hasattr(field_info, 'json_schema_extra') and field_info.json_schema_extra is not None: triple_quoted_string = field_info.json_schema_extra.get('triple_quoted_string', False) markdown_string = field_info.json_schema_extra.get('markdown_string', False) gbnf_type = PydanticDataType.TRIPLE_QUOTED_STRING.value if triple_quoted_string else PydanticDataType.STRING.value gbnf_type = PydanticDataType.MARKDOWN_STRING.value if markdown_string else gbnf_type elif field_info and hasattr(field_info, 'pattern'): # Convert regex pattern to grammar rule regex_pattern = field_info.regex.pattern gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}" else: gbnf_type = PydanticDataType.STRING.value elif isclass(field_type) and issubclass(field_type, float) and field_info and hasattr(field_info, 'json_schema_extra') and field_info.json_schema_extra is not None: # Retrieve precision attributes for floats max_precision = field_info.json_schema_extra.get('max_precision') if field_info and hasattr(field_info, 'json_schema_extra') else None min_precision = field_info.json_schema_extra.get('min_precision') if field_info and hasattr(field_info, 'json_schema_extra') else None max_digits = field_info.json_schema_extra.get('max_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None min_digits = field_info.json_schema_extra.get('min_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None # Generate GBNF rule for float with given attributes gbnf_type, rules = generate_gbnf_float_rules(max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision) elif isclass(field_type) and issubclass(field_type, int) and field_info and hasattr(field_info, 'json_schema_extra') and field_info.json_schema_extra is not None: # Retrieve digit attributes for integers max_digits = field_info.json_schema_extra.get('max_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None min_digits = field_info.json_schema_extra.get('min_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None # Generate GBNF rule for integer with given attributes gbnf_type, rules = generate_gbnf_integer_rules(max_digit=max_digits, min_digit=min_digits) else: gbnf_type, rules = gbnf_type, [] if gbnf_type not in created_rules: return gbnf_type, rules else: if gbnf_type in created_rules: return gbnf_type, rules def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set, created_rules: dict) -> (list, bool, bool): """ Generate GBnF Grammar Generates a GBnF grammar for a given model. :param model: A Pydantic model class to generate the grammar for. Must be a subclass of BaseModel. :param processed_models: A set of already processed models to prevent infinite recursion. :param created_rules: A dict containing already created rules to prevent duplicates. :return: A list of GBnF grammar rules in string format. And two booleans indicating if an extra markdown or triple quoted string is in the grammar. Example Usage: ``` model = MyModel processed_models = set() created_rules = dict() gbnf_grammar = generate_gbnf_grammar(model, processed_models, created_rules) ``` """ if model in processed_models: return [] processed_models.add(model) model_name = format_model_and_field_name(model.__name__) if not issubclass(model, BaseModel): # For non-Pydantic classes, generate model_fields from __annotations__ or __init__ if hasattr(model, '__annotations__') and model.__annotations__: model_fields = {name: (typ, ...) for name, typ in model.__annotations__.items()} else: init_signature = inspect.signature(model.__init__) parameters = init_signature.parameters model_fields = {name: (param.annotation, param.default) for name, param in parameters.items() if name != 'self'} else: # For Pydantic models, use model_fields and check for ellipsis (required fields) model_fields = model.__annotations__ model_rule_parts = [] nested_rules = [] has_markdown_code_block = False has_triple_quoted_string = False look_for_markdown_code_block = False look_for_triple_quoted_string = False for field_name, field_info in model_fields.items(): if not issubclass(model, BaseModel): field_type, default_value = field_info # Check if the field is optional (not required) is_optional = (default_value is not inspect.Parameter.empty) and (default_value is not Ellipsis) else: field_type = field_info field_info = model.model_fields[field_name] is_optional = field_info.is_required is False and get_origin(field_type) is Optional rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models, created_rules, field_info) look_for_markdown_code_block = True if rule_name == "markdown_string" else False look_for_triple_quoted_string = True if rule_name == "triple_quoted_string" else False if not look_for_markdown_code_block and not look_for_triple_quoted_string: if rule_name not in created_rules: created_rules[rule_name] = additional_rules model_rule_parts.append(f' ws \"\\\"{field_name}\\\"\" ": " {rule_name}') # Adding escaped quotes nested_rules.extend(additional_rules) else: has_triple_quoted_string = look_for_markdown_code_block has_markdown_code_block = look_for_triple_quoted_string fields_joined = r' "," "\n" '.join(model_rule_parts) model_rule = fr'{model_name} ::= "{{" "\n" {fields_joined} "\n" ws "}}"' if look_for_markdown_code_block or look_for_triple_quoted_string: model_rule += ' ws "}"' if has_triple_quoted_string: model_rule += '"\\n" triple-quoted-string' if has_markdown_code_block: model_rule += '"\\n" markdown-code-block' all_rules = [model_rule] + nested_rules return all_rules, has_markdown_code_block, has_triple_quoted_string def generate_gbnf_grammar_from_pydantic_models(models: List[Type[BaseModel]], outer_object_name: str = None, outer_object_content: str = None, list_of_outputs: bool = False) -> str: """ Generate GBNF Grammar from Pydantic Models. This method takes a list of Pydantic models and uses them to generate a GBNF grammar string. The generated grammar string can be used for parsing and validating data using the generated * grammar. Parameters: models (List[Type[BaseModel]]): A list of Pydantic models to generate the grammar from. outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling. outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling. list_of_outputs (str, optional): Allows a list of output objects Returns: str: The generated GBNF grammar string. Examples: models = [UserModel, PostModel] grammar = generate_gbnf_grammar_from_pydantic(models) print(grammar) # Output: # root ::= UserModel | PostModel # ... """ processed_models = set() all_rules = [] created_rules = {} if outer_object_name is None: for model in models: model_rules, _, _ = generate_gbnf_grammar(model, processed_models, created_rules) all_rules.extend(model_rules) if list_of_outputs: root_rule = r'root ::= ws "[" grammar-models ("," grammar-models)* "]"' + "\n" else: root_rule = r'root ::= ws grammar-models' + "\n" root_rule += "grammar-models ::= " + " | ".join( [format_model_and_field_name(model.__name__) for model in models]) all_rules.insert(0, root_rule) return "\n".join(all_rules) elif outer_object_name is not None: if list_of_outputs: root_rule = fr'root ::= ws "[" {format_model_and_field_name(outer_object_name)} ("," {format_model_and_field_name(outer_object_name)})* "]"' + "\n" else: root_rule = f"root ::= {format_model_and_field_name(outer_object_name)}\n" model_rule = fr'{format_model_and_field_name(outer_object_name)} ::= ws "{{" ws "\"{outer_object_name}\"" ": " grammar-models' fields_joined = " | ".join( [fr'{format_model_and_field_name(model.__name__)}-grammar-model' for model in models]) grammar_model_rules = f'\ngrammar-models ::= {fields_joined}' mod_rules = [] for model in models: mod_rule = fr'{format_model_and_field_name(model.__name__)}-grammar-model ::= ws' mod_rule += fr'"\"{format_model_and_field_name(model.__name__)}\"" "," ws "\"{outer_object_content}\"" ws ":" ws {format_model_and_field_name(model.__name__)}' + '\n' mod_rules.append(mod_rule) grammar_model_rules += "\n" + "\n".join(mod_rules) look_for_markdown_code_block = False look_for_triple_quoted_string = False for model in models: model_rules, markdown_block, triple_quoted_string = generate_gbnf_grammar(model, processed_models, created_rules) all_rules.extend(model_rules) if markdown_block: look_for_markdown_code_block = True if triple_quoted_string: look_for_triple_quoted_string = True if not look_for_markdown_code_block and not look_for_triple_quoted_string: model_rule += ' ws "}"' all_rules.insert(0, root_rule + model_rule + grammar_model_rules) return "\n".join(all_rules) def get_primitive_grammar(grammar): """ Returns the needed GBNF primitive grammar for a given GBNF grammar string. Args: grammar (str): The string containing the GBNF grammar. Returns: str: GBNF primitive grammar string. """ type_list = [] if "string-list" in grammar: type_list.append(str) if "boolean-list" in grammar: type_list.append(bool) if "integer-list" in grammar: type_list.append(int) if "float-list" in grammar: type_list.append(float) additional_grammar = [generate_list_rule(t) for t in type_list] primitive_grammar = r""" boolean ::= "true" | "false" null ::= "null" string ::= "\"" ( [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) )* "\"" ws ws ::= ([ \t\n] ws)? float ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws integer ::= [0-9]+""" any_block = "" if "custom-class-any" in grammar: any_block = ''' value ::= object | array | string | number | boolean | null object ::= "{" ws ( string ":" ws value ("," ws string ":" ws value)* )? "}" ws array ::= "[" ws ( value ("," ws value)* )? "]" ws number ::= integer | float''' markdown_code_block_grammar = "" if "markdown-code-block" in grammar: markdown_code_block_grammar = r''' markdown-code-block ::= opening-triple-ticks markdown-code-block-content closing-triple-ticks markdown-code-block-content ::= ( [^`] | "`" [^`] | "`" "`" [^`] )* opening-triple-ticks ::= "```" "python" "\n" | "```" "c" "\n" | "```" "cpp" "\n" | "```" "txt" "\n" | "```" "text" "\n" | "```" "json" "\n" | "```" "javascript" "\n" | "```" "css" "\n" | "```" "html" "\n" | "```" "markdown" "\n" closing-triple-ticks ::= "```" "\n"''' if "triple-quoted-string" in grammar: markdown_code_block_grammar = r""" triple-quoted-string ::= triple-quotes triple-quoted-string-content triple-quotes triple-quoted-string-content ::= ( [^'] | "'" [^'] | "'" "'" [^'] )* triple-quotes ::= "'''" """ return "\n" + '\n'.join(additional_grammar) + any_block + primitive_grammar + markdown_code_block_grammar def generate_field_markdown(field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1) -> str: indent = ' ' * depth field_markdown = f"{indent}- **{field_name}** (`{field_type.__name__}`): " # Extracting field description from Pydantic Field using __model_fields__ field_info = model.model_fields.get(field_name) field_description = field_info.description if field_info and field_info.description else "No description available." field_markdown += field_description + '\n' # Handling nested BaseModel fields if isclass(field_type) and issubclass(field_type, BaseModel): field_markdown += f"{indent} - Details:\n" for name, type_ in field_type.__annotations__.items(): field_markdown += generate_field_markdown(name, type_, field_type, depth + 2) return field_markdown def generate_markdown_report(pydantic_models: List[Type[BaseModel]]) -> str: markdown = "" for model in pydantic_models: markdown += f"### {format_model_and_field_name(model.__name__)}\n" # Check if the model's docstring is different from BaseModel's docstring class_doc = getdoc(model) base_class_doc = getdoc(BaseModel) class_description = class_doc if class_doc and class_doc != base_class_doc else "No specific description available." markdown += f"{class_description}\n\n" markdown += "#### Fields\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): markdown += generate_field_markdown(format_model_and_field_name(name), field_type, model) markdown += "\n" return markdown def format_json_example(example: dict, depth: int) -> str: """ Format a JSON example into a readable string with indentation. Args: example (dict): JSON example to be formatted. depth (int): Indentation depth. Returns: str: Formatted JSON example string. """ indent = ' ' * depth formatted_example = '{\n' for key, value in example.items(): value_text = f"'{value}'" if isinstance(value, str) else value formatted_example += f"{indent}{key}: {value_text},\n" formatted_example = formatted_example.rstrip(',\n') + '\n' + indent + '}' return formatted_example def generate_text_documentation(pydantic_models: List[Type[BaseModel]], model_prefix="Model", fields_prefix="Fields", documentation_with_field_description=True) -> str: """ Generate text documentation for a list of Pydantic models. Args: pydantic_models (List[Type[BaseModel]]): List of Pydantic model classes. model_prefix (str): Prefix for the model section. fields_prefix (str): Prefix for the fields section. documentation_with_field_description (bool): Include field descriptions in the documentation. Returns: str: Generated text documentation. """ documentation = "" pyd_models = [(model, True) for model in pydantic_models] for model, add_prefix in pyd_models: if add_prefix: documentation += f"{model_prefix}: {format_model_and_field_name(model.__name__)}\n" else: documentation += f"Model: {format_model_and_field_name(model.__name__)}\n" # Handling multi-line model description with proper indentation class_doc = getdoc(model) base_class_doc = getdoc(BaseModel) class_description = class_doc if class_doc and class_doc != base_class_doc else "" if class_description != "": documentation += " Description: " documentation += "\n" + format_multiline_description(class_description, 2) + "\n" if add_prefix: # Indenting the fields section documentation += f" {fields_prefix}:\n" else: documentation += f" Fields:\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): # if name == "markdown_code_block": # continue if get_origin(field_type) == list: element_type = get_args(field_type)[0] if isclass(element_type) and issubclass(element_type, BaseModel): pyd_models.append((element_type, False)) if get_origin(field_type) == Union: element_types = get_args(field_type) for element_type in element_types: if isclass(element_type) and issubclass(element_type, BaseModel): pyd_models.append((element_type, False)) documentation += generate_field_text(name, field_type, model, documentation_with_field_description=documentation_with_field_description) documentation += "\n" if hasattr(model, 'Config') and hasattr(model.Config, 'json_schema_extra') and 'example' in model.Config.json_schema_extra: documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n" json_example = json.dumps(model.Config.json_schema_extra['example']) documentation += format_multiline_description(json_example, 2) + "\n" return documentation def generate_field_text(field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1, documentation_with_field_description=True) -> str: """ Generate text documentation for a Pydantic model field. Args: field_name (str): Name of the field. field_type (Type[Any]): Type of the field. model (Type[BaseModel]): Pydantic model class. depth (int): Indentation depth in the documentation. documentation_with_field_description (bool): Include field descriptions in the documentation. Returns: str: Generated text documentation for the field. """ indent = ' ' * depth field_info = model.model_fields.get(field_name) field_description = field_info.description if field_info and field_info.description else "" if get_origin(field_type) == list: element_type = get_args(field_type)[0] field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})" if field_description != "": field_text += ":\n" else: field_text += "\n" elif get_origin(field_type) == Union: element_types = get_args(field_type) types = [] for element_type in element_types: types.append(format_model_and_field_name(element_type.__name__)) field_text = f"{indent}{field_name} ({' or '.join(types)})" if field_description != "": field_text += ":\n" else: field_text += "\n" else: field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})" if field_description != "": field_text += ":\n" else: field_text += "\n" if not documentation_with_field_description: return field_text if field_description != "": field_text += f"{indent} Description: " + field_description + "\n" # Check for and include field-specific examples if available if hasattr(model, 'Config') and hasattr(model.Config, 'json_schema_extra') and 'example' in model.Config.json_schema_extra: field_example = model.Config.json_schema_extra['example'].get(field_name) if field_example is not None: example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example field_text += f"{indent} Example: {example_text}\n" if isclass(field_type) and issubclass(field_type, BaseModel): field_text += f"{indent} Details:\n" for name, type_ in field_type.__annotations__.items(): field_text += generate_field_text(name, type_, field_type, depth + 2) return field_text def format_multiline_description(description: str, indent_level: int) -> str: """ Format a multiline description with proper indentation. Args: description (str): Multiline description. indent_level (int): Indentation level. Returns: str: Formatted multiline description. """ indent = ' ' * indent_level return indent + description.replace('\n', '\n' + indent) def save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"): """ Save GBNF grammar and documentation to specified files. Args: grammar (str): GBNF grammar string. documentation (str): Documentation string. grammar_file_path (str): File path to save the GBNF grammar. documentation_file_path (str): File path to save the documentation. Returns: None """ try: with open(grammar_file_path, 'w') as file: file.write(grammar + get_primitive_grammar(grammar)) print(f"Grammar successfully saved to {grammar_file_path}") except IOError as e: print(f"An error occurred while saving the grammar file: {e}") try: with open(documentation_file_path, 'w') as file: file.write(documentation) print(f"Documentation successfully saved to {documentation_file_path}") except IOError as e: print(f"An error occurred while saving the documentation file: {e}") def remove_empty_lines(string): """ Remove empty lines from a string. Args: string (str): Input string. Returns: str: String with empty lines removed. """ lines = string.splitlines() non_empty_lines = [line for line in lines if line.strip() != ""] string_no_empty_lines = "\n".join(non_empty_lines) return string_no_empty_lines def generate_and_save_gbnf_grammar_and_documentation(pydantic_model_list, grammar_file_path="./generated_grammar.gbnf", documentation_file_path="./generated_grammar_documentation.md", outer_object_name: str = None, outer_object_content: str = None, model_prefix: str = "Output Model", fields_prefix: str = "Output Fields", list_of_outputs: bool = False, documentation_with_field_description=True): """ Generate GBNF grammar and documentation, and save them to specified files. Args: pydantic_model_list: List of Pydantic model classes. grammar_file_path (str): File path to save the generated GBNF grammar. documentation_file_path (str): File path to save the generated documentation. outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling. outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling. model_prefix (str): Prefix for the model section in the documentation. fields_prefix (str): Prefix for the fields section in the documentation. list_of_outputs (bool): Whether the output is a list of items. documentation_with_field_description (bool): Include field descriptions in the documentation. Returns: None """ documentation = generate_text_documentation(pydantic_model_list, model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description) grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs) grammar = remove_empty_lines(grammar) save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path) def generate_gbnf_grammar_and_documentation(pydantic_model_list, outer_object_name: str = None, outer_object_content: str = None, model_prefix: str = "Output Model", fields_prefix: str = "Output Fields", list_of_outputs: bool = False, documentation_with_field_description=True): """ Generate GBNF grammar and documentation for a list of Pydantic models. Args: pydantic_model_list: List of Pydantic model classes. outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling. outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling. model_prefix (str): Prefix for the model section in the documentation. fields_prefix (str): Prefix for the fields section in the documentation. list_of_outputs (bool): Whether the output is a list of items. documentation_with_field_description (bool): Include field descriptions in the documentation. Returns: tuple: GBNF grammar string, documentation string. """ documentation = generate_text_documentation(copy(pydantic_model_list), model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description) grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs) grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar)) return grammar, documentation def generate_gbnf_grammar_and_documentation_from_dictionaries(dictionaries: List[dict], outer_object_name: str = None, outer_object_content: str = None, model_prefix: str = "Output Model", fields_prefix: str = "Output Fields", list_of_outputs: bool = False, documentation_with_field_description=True): """ Generate GBNF grammar and documentation from a list of dictionaries. Args: dictionaries (List[dict]): List of dictionaries representing Pydantic models. outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling. outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling. model_prefix (str): Prefix for the model section in the documentation. fields_prefix (str): Prefix for the fields section in the documentation. list_of_outputs (bool): Whether the output is a list of items. documentation_with_field_description (bool): Include field descriptions in the documentation. Returns: tuple: GBNF grammar string, documentation string. """ pydantic_model_list = create_dynamic_models_from_dictionaries(dictionaries) documentation = generate_text_documentation(copy(pydantic_model_list), model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description) grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs) grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar)) return grammar, documentation def create_dynamic_model_from_function(func: Callable): """ Creates a dynamic Pydantic model from a given function's type hints and adds the function as a 'run' method. Args: func (Callable): A function with type hints from which to create the model. Returns: A dynamic Pydantic model class with the provided function as a 'run' method. """ # Extracting type hints from the provided function type_hints = get_type_hints(func) type_hints.pop('return', None) # Handling default values and annotations dynamic_fields = {} defaults = getattr(func, '__defaults__', ()) or () defaults_index = len(type_hints) - len(defaults) for index, (name, typ) in enumerate(type_hints.items()): if index >= defaults_index: default_value = defaults[index - defaults_index] dynamic_fields[name] = (typ, default_value) else: dynamic_fields[name] = (typ, ...) # Creating the dynamic model dynamicModel = create_model(f'{func.__name__}', **dynamic_fields) dynamicModel.__doc__ = getdoc(func) # Wrapping the original function to handle instance 'self' def run_method_wrapper(self): func_args = {name: getattr(self, name) for name in type_hints} return func(**func_args) # Adding the wrapped function as a 'run' method setattr(dynamicModel, 'run', run_method_wrapper) return dynamicModel def add_run_method_to_dynamic_model(model: Type[BaseModel], func: Callable): """ Add a 'run' method to a dynamic Pydantic model, using the provided function. Args: - model (Type[BaseModel]): Dynamic Pydantic model class. - func (Callable): Function to be added as a 'run' method to the model. Returns: - Type[BaseModel]: Pydantic model class with the added 'run' method. """ def run_method_wrapper(self): func_args = {name: getattr(self, name) for name in model.model_fields} return func(**func_args) # Adding the wrapped function as a 'run' method setattr(model, 'run', run_method_wrapper) return model def create_dynamic_models_from_dictionaries(dictionaries: List[dict]): """ Create a list of dynamic Pydantic model classes from a list of dictionaries. Args: - dictionaries (List[dict]): List of dictionaries representing model structures. Returns: - List[Type[BaseModel]]: List of generated dynamic Pydantic model classes. """ dynamic_models = [] for func in dictionaries: model_name = format_model_and_field_name(func.get("name", "")) dyn_model = convert_dictionary_to_to_pydantic_model(func, model_name) dynamic_models.append(dyn_model) return dynamic_models def map_grammar_names_to_pydantic_model_class(pydantic_model_list): output = {} for model in pydantic_model_list: output[format_model_and_field_name(model.__name__)] = model return output from enum import Enum def json_schema_to_python_types(schema): type_map = { 'any': Any, 'string': str, 'number': float, 'integer': int, 'boolean': bool, 'array': list, } return type_map[schema] def list_to_enum(enum_name, values): return Enum(enum_name, {value: value for value in values}) def convert_dictionary_to_to_pydantic_model(dictionary: dict, model_name: str = 'CustomModel') -> Type[BaseModel]: """ Convert a dictionary to a Pydantic model class. Args: - dictionary (dict): Dictionary representing the model structure. - model_name (str): Name of the generated Pydantic model. Returns: - Type[BaseModel]: Generated Pydantic model class. """ fields = {} if "properties" in dictionary: for field_name, field_data in dictionary.get("properties", {}).items(): if field_data == 'object': submodel = convert_dictionary_to_to_pydantic_model(dictionary, f'{model_name}_{field_name}') fields[field_name] = (submodel, ...) else: field_type = field_data.get('type', 'str') if field_data.get("enum", []): fields[field_name] = (list_to_enum(field_name, field_data.get("enum", [])), ...) if field_type == "array": items = field_data.get("items", {}) if items != {}: array = {"properties": items} array_type = convert_dictionary_to_to_pydantic_model(array, f'{model_name}_{field_name}_items') fields[field_name] = (List[array_type], ...) else: fields[field_name] = (list, ...) elif field_type == 'object': submodel = convert_dictionary_to_to_pydantic_model(field_data, f'{model_name}_{field_name}') fields[field_name] = (submodel, ...) else: field_type = json_schema_to_python_types(field_type) fields[field_name] = (field_type, ...) if "function" in dictionary: for field_name, field_data in dictionary.get("function", {}).items(): if field_name == "name": model_name = field_data elif field_name == "description": fields["__doc__"] = field_data elif field_name == "parameters": return convert_dictionary_to_to_pydantic_model(field_data, f'{model_name}') if "parameters" in dictionary: field_data = {"function": dictionary} return convert_dictionary_to_to_pydantic_model(field_data, f'{model_name}') custom_model = create_model(model_name, **fields) return custom_model