methunraj
refactor: streamline Excel report generation with improved error handling and mandatory steps
0daea93
raw
history blame
4.88 kB
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
class Settings:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MAX_FILE_SIZE_MB = 50
SUPPORTED_FILE_TYPES = [
"pdf",
"txt",
"png",
"jpg",
"jpeg",
"docx",
"xlsx",
"csv",
"md",
"json",
"xml",
"html",
"py",
"js",
"ts",
"doc",
"xls",
"ppt",
"pptx",
]
# Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.0-flash")
CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.0-flash")
COORDINATOR_MODEL_THINKING_BUDGET=2048
PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
@classmethod
def validate_config(cls):
"""Validate configuration and create necessary directories."""
errors = []
warnings = []
# Check required API keys
if not cls.GOOGLE_API_KEY:
errors.append("GOOGLE_API_KEY is required - get it from Google AI Studio")
# Check for optional but recommended API keys
openai_key = os.getenv("OPENAI_API_KEY")
if not openai_key:
warnings.append("OPENAI_API_KEY not set - OpenAI models will not be available")
# Validate and create temp directory
try:
cls.TEMP_DIR.mkdir(exist_ok=True, parents=True)
# Test write permissions
test_file = cls.TEMP_DIR / ".write_test"
try:
test_file.write_text("test")
test_file.unlink()
except Exception as e:
errors.append(f"Cannot write to temp directory {cls.TEMP_DIR}: {e}")
except Exception as e:
errors.append(f"Cannot create temp directory {cls.TEMP_DIR}: {e}")
# Validate file size limits
if cls.MAX_FILE_SIZE_MB <= 0:
errors.append("MAX_FILE_SIZE_MB must be positive")
elif cls.MAX_FILE_SIZE_MB > 100:
warnings.append(f"MAX_FILE_SIZE_MB ({cls.MAX_FILE_SIZE_MB}) is very large")
# Validate supported file types
if not cls.SUPPORTED_FILE_TYPES:
errors.append("SUPPORTED_FILE_TYPES cannot be empty")
# Validate model names
model_fields = ['DATA_EXTRACTOR_MODEL', 'DATA_ARRANGER_MODEL', 'CODE_GENERATOR_MODEL']
for field in model_fields:
model_name = getattr(cls, field)
if not model_name:
errors.append(f"{field} cannot be empty")
elif not model_name.startswith(('gemini-', 'gpt-', 'claude-')):
warnings.append(f"{field} '{model_name}' may not be a valid model name")
# Return validation results
if errors:
error_msg = "Configuration validation failed:\n" + "\n".join(f"- {error}" for error in errors)
if warnings:
error_msg += "\n\nWarnings:\n" + "\n".join(f"- {warning}" for warning in warnings)
raise ValueError(error_msg)
if warnings:
import logging
logger = logging.getLogger(__name__)
logger.warning("Configuration warnings:\n" + "\n".join(f"- {warning}" for warning in warnings))
return True
@classmethod
def get_debug_info(cls):
"""Get debug information about current configuration."""
import platform
import sys
return {
"python_version": sys.version,
"platform": platform.platform(),
"temp_dir": str(cls.TEMP_DIR),
"temp_dir_exists": cls.TEMP_DIR.exists(),
"supported_file_types": len(cls.SUPPORTED_FILE_TYPES),
"max_file_size_mb": cls.MAX_FILE_SIZE_MB,
"has_google_api_key": bool(cls.GOOGLE_API_KEY),
"has_openai_api_key": bool(os.getenv("OPENAI_API_KEY")),
"models": {
"data_extractor": cls.DATA_EXTRACTOR_MODEL,
"data_arranger": cls.DATA_ARRANGER_MODEL,
"code_generator": cls.CODE_GENERATOR_MODEL
}
}
settings = Settings()