File size: 8,392 Bytes
cfeb3a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from datetime import datetime


class FileInfo(BaseModel):
    """Information about the file being processed."""
    name: str = Field(description="File name")
    type: str = Field(description="File type/extension")
    size_mb: float = Field(description="File size in MB")
    path: str = Field(description="Full file path")


class SimplifiedAgentConfig(BaseModel):
    """Simplified configuration for agent creation without complex nesting."""
    instructions: str = Field(description="Single string instructions")
    requirement_type: str = Field(default="standard", description="Type of requirements")
    custom_notes: List[str] = Field(default_factory=list, description="Simple notes")


class ProcessingPlan(BaseModel):
    """Simplified processing plan for document analysis."""
    # Basic plan information
    document_type: str = Field(description="Document type (financial, legal, technical, etc.)")
    analysis_objective: str = Field(description="Primary analysis objective")
    complexity: str = Field(default="moderate", description="Complexity level")
    processing_strategy: str = Field(description="Overall processing strategy")
    
    # Essential configurations (simplified)
    agent_configs: Dict[str, str] = Field(
        default_factory=dict, 
        description="Simple agent configuration summaries"
    )
    
    # Simple schema suggestions using basic types
    data_fields: List[str] = Field(description="List of suggested data fields to extract")
    validation_rules: List[str] = Field(default_factory=list, description="Validation rules")
    output_formats: List[str] = Field(default_factory=list, description="Required output formats")
    
    # Simple notes and requirements
    requirements: List[str] = Field(default_factory=list, description="Processing requirements")
    notes: str = Field(default="", description="Additional notes")


class AgentConfiguration(BaseModel):
    """Configuration for a dynamically created agent."""
    instructions: List[str] = Field(description="Specific instructions for this agent")
    custom_prompt_template: Optional[str] = Field(default="", description="Custom prompt template for this agent")
    special_requirements: List[str] = Field(default_factory=list, description="Special requirements or constraints")


class DataPoint(BaseModel):
    """Individual data point extracted from document."""
    field_name: str = Field(description="Name of the data field")
    value: str = Field(description="Value of the field")
    data_type: Optional[str] = Field(default="", description="Type of data (text, number, date, etc.)")
    category: Optional[str] = Field(default="", description="Category or section this data belongs to")
    unit: Optional[str] = Field(default="", description="Unit of measurement if applicable")
    period: Optional[str] = Field(default="", description="Time period if applicable")
    confidence_score: float = Field(description="Confidence score for the extraction (0-1)")
    source_location: Optional[str] = Field(default="", description="Location in document where data was found")


class ExtractedData(BaseModel):
    """Structured data extracted from the document."""
    data_points: List[DataPoint] = Field(description="List of extracted data points")
    extraction_notes: str = Field(default="", description="Notes about the extraction process")
    confidence_score: float = Field(description="Overall confidence score for the extraction")
    extraction_timestamp: datetime = Field(default_factory=datetime.now, description="When extraction was performed")
    document_summary: Optional[str] = Field(default="", description="Brief summary of the document content")


class DataInsight(BaseModel):
    """Individual insight from data analysis."""
    insight_type: str = Field(description="Type of insight (trend, comparison, etc.)")
    description: str = Field(description="Description of the insight")
    supporting_data: List[str] = Field(description="Data points that support this insight")
    importance_level: str = Field(description="Importance level (high, medium, low)")


class DataCategory(BaseModel):
    """A category of organized data."""
    category_name: str = Field(description="Name of the data category")
    data_points: Dict[str, str] = Field(description="Key-value pairs of data in this category")
    
class ArrangedData(BaseModel):
    """Organized and analyzed data."""
    organized_categories: List[DataCategory] = Field(
        description="Data organized into logical categories"
    )
    insights: List[DataInsight] = Field(description="Insights generated from the data")
    summary: str = Field(description="Summary of the arranged data")
    arrangement_notes: str = Field(description="Notes about the arrangement process")


class CodeGenerationResult(BaseModel):
    """Result of code generation and execution."""
    generated_code: str = Field(description="The generated Python code")
    execution_result: str = Field(description="Result of code execution")
    output_files: List[str] = Field(description="List of output files created")
    execution_success: bool = Field(description="Whether code execution was successful")
    error_messages: List[str] = Field(default_factory=list, description="Any error messages encountered")


class DocumentAnalysisResult(BaseModel):
    """Complete result of document analysis team workflow."""
    document_type: str = Field(description="Type of document analyzed")
    analysis_objective: str = Field(description="Original analysis objective")
    processing_summary: str = Field(description="Summary of the entire processing workflow")
    
    # Results from each stage
    planning_notes: str = Field(description="Notes from the planning stage")
    prompts_created: str = Field(description="Summary of prompts and schemas created")
    data_extracted: str = Field(description="Summary of data extraction results")
    data_arranged: str = Field(description="Summary of data arrangement and insights")
    code_generated: str = Field(description="Summary of code generation and execution")
    
    # Final outputs
    key_findings: List[str] = Field(description="Key findings from the analysis")
    output_files_created: List[str] = Field(description="List of output files created")
    success: bool = Field(description="Whether the analysis completed successfully")
    recommendations: List[str] = Field(default_factory=list, description="Recommendations based on analysis")


class ExtractionField(BaseModel):
    """Individual field specification for data extraction."""
    field_name: str = Field(description="Name of the field to extract")
    field_type: str = Field(description="Type of data (text, number, date, etc.)")
    description: str = Field(description="Description of what this field represents")
    required: bool = Field(default=True, description="Whether this field is required")

class AgentPrompt(BaseModel):
    """Prompt configuration for a specific agent."""
    agent_name: str = Field(description="Name of the agent")
    specialized_instructions: List[str] = Field(description="Specialized instructions for this agent")
    input_requirements: List[str] = Field(description="What input this agent needs")
    output_requirements: List[str] = Field(description="What output this agent should produce")
    success_criteria: List[str] = Field(description="Criteria for successful completion")

class PromptsAndSchemas(BaseModel):
    """Prompts and schemas for all agents in the workflow."""
    # Data extraction specific
    extraction_prompt: str = Field(description="Optimized prompt for data extraction")
    extraction_fields: List[ExtractionField] = Field(
        description="List of fields to extract from the document"
    )
    arrangement_rules: List[str] = Field(description="Rules for organizing extracted data")
    validation_criteria: List[str] = Field(description="Criteria for validating extracted data")
    
    # All agent prompts
    agent_prompts: List[AgentPrompt] = Field(description="Specialized prompts for each agent")
    workflow_coordination: List[str] = Field(description="Instructions for coordinating between agents")
    quality_assurance: List[str] = Field(description="Quality assurance guidelines for all agents")