File size: 8,392 Bytes
cfeb3a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from datetime import datetime
class FileInfo(BaseModel):
"""Information about the file being processed."""
name: str = Field(description="File name")
type: str = Field(description="File type/extension")
size_mb: float = Field(description="File size in MB")
path: str = Field(description="Full file path")
class SimplifiedAgentConfig(BaseModel):
"""Simplified configuration for agent creation without complex nesting."""
instructions: str = Field(description="Single string instructions")
requirement_type: str = Field(default="standard", description="Type of requirements")
custom_notes: List[str] = Field(default_factory=list, description="Simple notes")
class ProcessingPlan(BaseModel):
"""Simplified processing plan for document analysis."""
# Basic plan information
document_type: str = Field(description="Document type (financial, legal, technical, etc.)")
analysis_objective: str = Field(description="Primary analysis objective")
complexity: str = Field(default="moderate", description="Complexity level")
processing_strategy: str = Field(description="Overall processing strategy")
# Essential configurations (simplified)
agent_configs: Dict[str, str] = Field(
default_factory=dict,
description="Simple agent configuration summaries"
)
# Simple schema suggestions using basic types
data_fields: List[str] = Field(description="List of suggested data fields to extract")
validation_rules: List[str] = Field(default_factory=list, description="Validation rules")
output_formats: List[str] = Field(default_factory=list, description="Required output formats")
# Simple notes and requirements
requirements: List[str] = Field(default_factory=list, description="Processing requirements")
notes: str = Field(default="", description="Additional notes")
class AgentConfiguration(BaseModel):
"""Configuration for a dynamically created agent."""
instructions: List[str] = Field(description="Specific instructions for this agent")
custom_prompt_template: Optional[str] = Field(default="", description="Custom prompt template for this agent")
special_requirements: List[str] = Field(default_factory=list, description="Special requirements or constraints")
class DataPoint(BaseModel):
"""Individual data point extracted from document."""
field_name: str = Field(description="Name of the data field")
value: str = Field(description="Value of the field")
data_type: Optional[str] = Field(default="", description="Type of data (text, number, date, etc.)")
category: Optional[str] = Field(default="", description="Category or section this data belongs to")
unit: Optional[str] = Field(default="", description="Unit of measurement if applicable")
period: Optional[str] = Field(default="", description="Time period if applicable")
confidence_score: float = Field(description="Confidence score for the extraction (0-1)")
source_location: Optional[str] = Field(default="", description="Location in document where data was found")
class ExtractedData(BaseModel):
"""Structured data extracted from the document."""
data_points: List[DataPoint] = Field(description="List of extracted data points")
extraction_notes: str = Field(default="", description="Notes about the extraction process")
confidence_score: float = Field(description="Overall confidence score for the extraction")
extraction_timestamp: datetime = Field(default_factory=datetime.now, description="When extraction was performed")
document_summary: Optional[str] = Field(default="", description="Brief summary of the document content")
class DataInsight(BaseModel):
"""Individual insight from data analysis."""
insight_type: str = Field(description="Type of insight (trend, comparison, etc.)")
description: str = Field(description="Description of the insight")
supporting_data: List[str] = Field(description="Data points that support this insight")
importance_level: str = Field(description="Importance level (high, medium, low)")
class DataCategory(BaseModel):
"""A category of organized data."""
category_name: str = Field(description="Name of the data category")
data_points: Dict[str, str] = Field(description="Key-value pairs of data in this category")
class ArrangedData(BaseModel):
"""Organized and analyzed data."""
organized_categories: List[DataCategory] = Field(
description="Data organized into logical categories"
)
insights: List[DataInsight] = Field(description="Insights generated from the data")
summary: str = Field(description="Summary of the arranged data")
arrangement_notes: str = Field(description="Notes about the arrangement process")
class CodeGenerationResult(BaseModel):
"""Result of code generation and execution."""
generated_code: str = Field(description="The generated Python code")
execution_result: str = Field(description="Result of code execution")
output_files: List[str] = Field(description="List of output files created")
execution_success: bool = Field(description="Whether code execution was successful")
error_messages: List[str] = Field(default_factory=list, description="Any error messages encountered")
class DocumentAnalysisResult(BaseModel):
"""Complete result of document analysis team workflow."""
document_type: str = Field(description="Type of document analyzed")
analysis_objective: str = Field(description="Original analysis objective")
processing_summary: str = Field(description="Summary of the entire processing workflow")
# Results from each stage
planning_notes: str = Field(description="Notes from the planning stage")
prompts_created: str = Field(description="Summary of prompts and schemas created")
data_extracted: str = Field(description="Summary of data extraction results")
data_arranged: str = Field(description="Summary of data arrangement and insights")
code_generated: str = Field(description="Summary of code generation and execution")
# Final outputs
key_findings: List[str] = Field(description="Key findings from the analysis")
output_files_created: List[str] = Field(description="List of output files created")
success: bool = Field(description="Whether the analysis completed successfully")
recommendations: List[str] = Field(default_factory=list, description="Recommendations based on analysis")
class ExtractionField(BaseModel):
"""Individual field specification for data extraction."""
field_name: str = Field(description="Name of the field to extract")
field_type: str = Field(description="Type of data (text, number, date, etc.)")
description: str = Field(description="Description of what this field represents")
required: bool = Field(default=True, description="Whether this field is required")
class AgentPrompt(BaseModel):
"""Prompt configuration for a specific agent."""
agent_name: str = Field(description="Name of the agent")
specialized_instructions: List[str] = Field(description="Specialized instructions for this agent")
input_requirements: List[str] = Field(description="What input this agent needs")
output_requirements: List[str] = Field(description="What output this agent should produce")
success_criteria: List[str] = Field(description="Criteria for successful completion")
class PromptsAndSchemas(BaseModel):
"""Prompts and schemas for all agents in the workflow."""
# Data extraction specific
extraction_prompt: str = Field(description="Optimized prompt for data extraction")
extraction_fields: List[ExtractionField] = Field(
description="List of fields to extract from the document"
)
arrangement_rules: List[str] = Field(description="Rules for organizing extracted data")
validation_criteria: List[str] = Field(description="Criteria for validating extracted data")
# All agent prompts
agent_prompts: List[AgentPrompt] = Field(description="Specialized prompts for each agent")
workflow_coordination: List[str] = Field(description="Instructions for coordinating between agents")
quality_assurance: List[str] = Field(description="Quality assurance guidelines for all agents") |