Spaces:
Running
Running
File size: 6,354 Bytes
5301c48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field, field_validator
from starfish.llm.parser.json_parser import JSONParser
# Pydantic models for field definitions
class SimpleField(BaseModel):
"""Pydantic model for simple field definitions."""
name: str = Field(..., description="Name of the field")
type: str = Field(..., description="Type of the field (str, int, float, bool, list, dict)")
description: str = Field("", description="Description of the field")
required: bool = Field(True, description="Whether the field is required")
@field_validator("type")
def validate_field_type(self, v):
valid_types = ["str", "int", "float", "bool", "list", "dict", "null"]
if v not in valid_types:
raise ValueError(f"Field type must be one of {valid_types}")
return v
class ArrayField(BaseModel):
"""Pydantic model for array field definitions."""
name: str = Field(..., description="Name of the field")
type: Literal["list"] = Field("list", description="Type is always 'list' for array fields")
items: Dict[str, Any] = Field(..., description="Definition of array items")
description: str = Field("", description="Description of the field")
required: bool = Field(True, description="Whether the field is required")
class NestedObjectField(BaseModel):
"""Pydantic model for nested object field definitions."""
name: str = Field(..., description="Name of the field")
type: Literal["dict"] = Field("dict", description="Type is always 'dict' for nested objects")
properties: Dict[str, Dict[str, Any]] = Field(..., description="Dictionary of property definitions")
description: str = Field("", description="Description of the field")
required: bool = Field(True, description="Whether this field is required")
required_props: Optional[List[str]] = Field(None, description="List of required properties in the nested object")
class JsonSchemaBuilder:
"""A utility class to build JSON schemas programmatically.
This can be used directly through function calls or as a backend for a UI.
Enhanced with Pydantic validation.
"""
def __init__(self):
"""Initialize an empty schema builder."""
self.fields = []
def add_simple_field(self, name: str, field_type: str, description: str = "", required: bool = True) -> None:
"""Add a simple field to the schema. Validated with Pydantic.
Args:
name: Field name
field_type: Field type (str, int, float, bool, list, dict)
description: Field description
required: Whether the field is required
Raises:
ValidationError: If the field definition is invalid
"""
# Validate with Pydantic
field = SimpleField(name=name, type=field_type, description=description, required=required)
# Add validated field to the schema
self.fields.append(field.model_dump())
def add_nested_object(
self, name: str, properties: Dict[str, Dict[str, Any]], description: str = "", required: bool = True, required_props: List[str] = None
) -> None:
"""Add a nested object field to the schema. Validated with Pydantic.
Args:
name: Field name
properties: Dictionary of property definitions
description: Field description
required: Whether this field is required
required_props: List of required properties in the nested object
Raises:
ValidationError: If the field definition is invalid
"""
# Validate with Pydantic
field = NestedObjectField(name=name, properties=properties, description=description, required=required, required_props=required_props)
# Add validated field to the schema
self.fields.append(field.model_dump())
def add_array_field(self, name: str, items: Dict[str, Any], description: str = "", required: bool = True) -> None:
"""Add an array field to the schema. Validated with Pydantic.
Args:
name: Field name
items: Definition of array items
description: Field description
required: Whether this field is required
Raises:
ValidationError: If the field definition is invalid
"""
# Validate with Pydantic
field = ArrayField(name=name, items=items, description=description, required=required)
# Add validated field to the schema
self.fields.append(field.model_dump())
def get_schema(self) -> List[Dict[str, Any]]:
"""Get the built schema as a list of field definitions.
Returns:
The schema as a list of field definitions
"""
return self.fields
def get_json_schema(self) -> Dict[str, Any]:
"""Get the schema as a JSON schema object.
Returns:
The schema as a JSON schema dictionary
"""
return JSONParser.convert_to_schema(self.fields)
def preview_schema_format(self) -> str:
"""Preview the schema format instructions.
Returns:
A formatted string with instructions for the schema
"""
json_schema = self.get_json_schema()
return JSONParser.get_format_instructions(json_schema)
def clear(self) -> None:
"""Clear the schema builder."""
self.fields = []
### Example usage
# # Creating a schema with JsonSchemaBuilder
# from starfish.common.json_schema_utils import JsonSchemaBuilder
# # Create a builder
# builder = JsonSchemaBuilder()
# # Add fields
# builder.add_simple_field("name", "str", "Customer's full name")
# builder.add_simple_field("age", "int", "Age in years")
# builder.add_simple_field("is_active", "bool", "Whether the account is active")
# # Add a nested object
# builder.add_nested_object(
# name="address",
# properties={
# "street": {"type": "string", "description": "Street address"},
# "city": {"type": "string", "description": "City name"},
# "zip": {"type": "string", "description": "ZIP code"}
# },
# description="Customer address"
# )
# # Get the schema
# schema = builder.get_json_schema()
# # Preview the schema format instructions
# instructions = builder.preview_schema_format()
|