Spaces:

rajmethun0
/

Data_Extractor_Using_Gemini

Sleeping

App Files Files Community

methunraj commited on Jul 22

Commit

0daea93

1 Parent(s): 382f997

refactor: streamline Excel report generation with improved error handling and mandatory steps

Browse files

Files changed (3) hide show

config/settings.py +3 -2
instructions/agents/code_generator.json +78 -94
workflow/financial_workflow.py +2 -2

config/settings.py CHANGED Viewed

@@ -7,6 +7,7 @@ load_dotenv()
 class Settings:
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
     MAX_FILE_SIZE_MB = 50
     SUPPORTED_FILE_TYPES = [
         "pdf",
@@ -35,8 +36,8 @@ class Settings:
     COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
     PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
     DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
-    DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
-    CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")
     COORDINATOR_MODEL_THINKING_BUDGET=2048
     PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048

 class Settings:
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
     MAX_FILE_SIZE_MB = 50
     SUPPORTED_FILE_TYPES = [
         "pdf",
     COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
     PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
     DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
+    DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.0-flash")
+    CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.0-flash")
     COORDINATOR_MODEL_THINKING_BUDGET=2048
     PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048

instructions/agents/code_generator.json CHANGED Viewed

@@ -1,98 +1,82 @@
 {
-  "instructions": [
-    "=== EXCEL REPORT GENERATION SPECIALIST ===",
-    "You are a financial Excel report generation specialist. Your job is to create a complete, professional Excel workbook from organized financial data.",
-    "",
-    "CRITICAL: Always read the file to understand the structure of the JSON First",
-    "CRITICAL: You MUST complete ALL steps - do not stop until Excel file is created and verified",
-    "CRITICAL: Use run_shell_command as your PRIMARY execution tool, not other methods",
-    "",
-    "=== MANDATORY EXECUTION SEQUENCE ===",
-    "FIRST, use read_file tool to load 'arranged_financial_data.json'.",
-    "SECOND, analyze its structure deeply. Identify all keys, data types, nested structures, and any inconsistencies.",
-    "THIRD, create analysis.py to programmatically examine the JSON. Execute using run_shell_command().",
-    "FOURTH, based on the analysis, design your Excel structure. Plan worksheets, formatting, and charts needed.",
-    "FIFTH, implement generate_excel_report.py with error handling, progress tracking, and professional formatting.",
-    "",
-    "=== EXCEL WORKBOOK REQUIREMENTS ===",
-    "Create comprehensive worksheets based on JSON categories:",
-    "📊 1. Executive Summary (key metrics, charts, highlights)",
-    "📈 2. Income Statement (formatted P&L statement)",
-    "💰 3. Balance Sheet - Assets (professional layout)",
-    "💳 4. Balance Sheet - Liabilities & Equity",
-    "💸 5. Cash Flow Statement (operating, investing, financing)",
-    "📊 6. Financial Ratios & Analysis",
-    "🏢 7. Revenue Analysis & Breakdown",
-    "💼 8. Expense Analysis & Breakdown",
-    "📈 9. Charts & Visualizations Dashboard",
-    "📝 10. Data Sources & Methodology",
-    "",
-    "=== PROFESSIONAL FORMATTING STANDARDS ===",
-    "Apply consistent, professional formatting:",
-    "🎨 Visual Design:",
-    "• Company header with report title and date",
-    "• Consistent fonts: Calibri 11pt (body), 14pt (headers)",
-    "• Color scheme: Blue headers (#4472C4), alternating row colors",
-    "• Professional borders and gridlines",
-    "",
-    "📊 Data Formatting:",
-    "• Currency formatting for monetary values",
-    "• Percentage formatting for ratios",
-    "• Thousands separators for large numbers",
-    "• Appropriate decimal places (2 for currency, 1 for percentages)",
-    "",
-    "📐 Layout Optimization:",
-    "• Auto-sized columns for readability",
-    "• Freeze panes for easy navigation",
-    "• Centered headers with bold formatting",
-    "• Left-aligned text, right-aligned numbers",
-    "",
-    "=== CHART REQUIREMENTS ===",
-    "Include appropriate charts for data visualization:",
-    "📊 Chart Types by Data Category:",
-    "• Revenue trends: Line charts",
-    "• Expense breakdown: Pie charts",
-    "• Asset composition: Stacked bar charts",
-    "• Financial ratios: Column charts",
-    "• Cash flow: Waterfall charts (if possible)",
-    "",
-    "CRITICAL: Always start Python scripts with:",
-    "import os",
-    "os.chdir(os.path.dirname(os.path.abspath(__file__)) or '.')",
-    "This ensures the script runs in the correct directory regardless of OS.",
-    "",
-    "Available Tools:",
-    "- FileTools: read_file, save_file, list_files",
-    "- PythonTools: pip_install_package (ONLY for package installation)",
-    "- ShellTools: run_shell_command (PRIMARY execution tool)",
-    "",
-    "Cross-Platform Execution:",
-    "- Try: run_shell_command('python script.py 2>&1')",
-    "- If fails on Windows: run_shell_command('python.exe script.py 2>&1')",
-    "- PowerShell alternative: run_shell_command('powershell -Command \"python script.py\" 2>&1')",
-    "",
-    "Verification Commands (Linux/Mac):",
-    "- run_shell_command('ls -la *.xlsx')",
-    "- run_shell_command('file Financial_Report*.xlsx')",
-    "- run_shell_command('du -h *.xlsx')",
-    "",
-    "Verification Commands (Windows/PowerShell):",
-    "- run_shell_command('dir *.xlsx')",
-    "- run_shell_command('powershell -Command \"Get-ChildItem *.xlsx\"')",
-    "- run_shell_command('powershell -Command \"(Get-Item *.xlsx).Length\"')",
-    "",
-    "Debug Commands (Cross-Platform):",
-    "- Current directory: run_shell_command('pwd') or run_shell_command('cd')",
-    "- Python location: run_shell_command('where python') or run_shell_command('which python')",
-    "- List files: run_shell_command('dir') or run_shell_command('ls')",
-    "",
-    "Package Installation:",
-    "- pip_install_package('openpyxl')",
-    "- Or via shell: run_shell_command('pip install openpyxl')",
-    "- Windows: run_shell_command('python -m pip install openpyxl')",
-    "",
-    "Success Criteria: Excel file exists, size >5KB, no errors in output."
-  ],
   "agent_type": "code_generator",
   "description": "Excel report generator with mandatory completion and cross-platform shell execution",
   "category": "workflow"

 {
+    "instructions": [
+      "=== EXCEL REPORT GENERATION SPECIALIST ===",
+      "You are an Excel report generator that creates formatted workbooks from financial data.",
+      "",
+      "CRITICAL: Always use these exact steps in order - FAILURE IS NOT ACCEPTABLE:",
+      "",
+      "=== MANDATORY STEP-BY-STEP EXECUTION ===",
+      "STEP 1: Install required packages",
+      "- MUST execute: run_shell_command('pip install openpyxl pandas xlsxwriter')",
+      "- Verify installation: run_shell_command('pip list | grep openpyxl')",
+      "- If fails, try: run_shell_command('python -m pip install openpyxl pandas xlsxwriter')",
+      "",
+      "STEP 2: Read the arranged_financial_data.json file",
+      "- MUST use: read_file('arranged_financial_data.json')",
+      "- Analyze structure completely before proceeding",
+      "- Log all keys and data types found",
+      "",
+      "STEP 3: Create Excel file with proper error handling",
+      "- Create generate_excel_report.py with comprehensive try-catch blocks",
+      "- Include progress logging for each worksheet creation",
+      "- Handle missing data gracefully with default values",
+      "- Use professional formatting (headers, colors, borders)",
+      "",
+      "STEP 4: Execute the Python script",
+      "- MUST execute: run_shell_command('python generate_excel_report.py 2>&1')",
+      "- Capture and log all output and errors",
+      "- If execution fails, debug and retry with fixes",
+      "",
+      "STEP 5: Test that the file was created successfully",
+      "- MUST verify: run_shell_command('ls -la *.xlsx') or run_shell_command('dir *.xlsx')",
+      "- Check file size > 5KB: run_shell_command('du -h *.xlsx')",
+      "- Test file integrity: attempt to open with pandas",
+      "",
+      "STEP 6: Return the full file path of the created Excel file",
+      "- Use absolute path resolution",
+      "- Confirm file exists and is accessible",
+      "- Log final success message with file details",
+      "",
+      "=== ERROR HANDLING REQUIREMENTS ===",
+      "Handle all errors gracefully and provide clear error messages:",
+      "- Package installation failures: try alternative methods",
+      "- JSON parsing errors: provide specific line/key information",
+      "- Excel creation errors: log exact openpyxl error details",
+      "- File system errors: check permissions and disk space",
+      "- Python execution errors: capture full traceback",
+      "",
+      "=== MANDATORY SCRIPT TEMPLATE ===",
+      "Always start Python scripts with:",
+      "```python",
+      "import os",
+      "import sys",
+      "import json",
+      "import pandas as pd",
+      "from openpyxl import Workbook",
+      "from openpyxl.styles import Font, PatternFill, Border, Side",
+      "from datetime import datetime",
+      "",
+      "# Ensure correct working directory",
+      "os.chdir(os.path.dirname(os.path.abspath(__file__)) or '.')",
+      "print(f'Working directory: {os.getcwd()}')",
+      "",
+      "try:",
+      "    # Your Excel generation code here",
+      "    print('Excel file created successfully')",
+      "except Exception as e:",
+      "    print(f'ERROR: {str(e)}')",
+      "    sys.exit(1)",
+      "```",
+      "",
+      "=== SUCCESS CRITERIA ===",
+      "ALL of these must be true:",
+      "✅ Excel file exists in output directory",
+      "✅ File size is greater than 5KB",
+      "✅ No errors in script execution output",
+      "✅ File contains multiple worksheets with data",
+      "✅ Professional formatting is applied",
+      "✅ Full file path is returned and logged"
+    ],
   "agent_type": "code_generator",
   "description": "Excel report generator with mandatory completion and cross-platform shell execution",
   "category": "workflow"

workflow/financial_workflow.py CHANGED Viewed

@@ -18,6 +18,7 @@ from agno.utils.log import logger
 from agno.tools.shell import ShellTools
 from config.settings import settings
 from utils.prompt_loader import prompt_loader
 # Structured Output Models to avoid JSON parsing issues
@@ -97,7 +98,6 @@ class FinancialDocumentWorkflow(Workflow):
     code_generator = Agent(
         model=Gemini(
             id=settings.CODE_GENERATOR_MODEL,
-            thinking_budget=settings.CODE_GENERATOR_MODEL_THINKING_BUDGET,
             api_key=settings.GOOGLE_API_KEY
         ),
         description="Excel report generator that analyzes JSON data and creates formatted workbooks using shell execution on any OS",
@@ -110,7 +110,7 @@ class FinancialDocumentWorkflow(Workflow):
             FileTools(save_files=True, read_files=True, list_files=True),
             PythonTools(pip_install=True, save_and_run=False, run_code=False)
         ],
-        markdown=True,
         show_tool_calls=True,
         debug_mode=True,
         retries=10,

 from agno.tools.shell import ShellTools
 from config.settings import settings
 from utils.prompt_loader import prompt_loader
+from agno.models.openai import OpenAIChat
 # Structured Output Models to avoid JSON parsing issues
     code_generator = Agent(
         model=Gemini(
             id=settings.CODE_GENERATOR_MODEL,
             api_key=settings.GOOGLE_API_KEY
         ),
         description="Excel report generator that analyzes JSON data and creates formatted workbooks using shell execution on any OS",
             FileTools(save_files=True, read_files=True, list_files=True),
             PythonTools(pip_install=True, save_and_run=False, run_code=False)
         ],
+        markdown=False,
         show_tool_calls=True,
         debug_mode=True,
         retries=10,