methunraj
refactor: streamline Excel report generation with improved error handling and mandatory steps
0daea93
import os | |
from pathlib import Path | |
from dotenv import load_dotenv | |
load_dotenv() | |
class Settings: | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
MAX_FILE_SIZE_MB = 50 | |
SUPPORTED_FILE_TYPES = [ | |
"pdf", | |
"txt", | |
"png", | |
"jpg", | |
"jpeg", | |
"docx", | |
"xlsx", | |
"csv", | |
"md", | |
"json", | |
"xml", | |
"html", | |
"py", | |
"js", | |
"ts", | |
"doc", | |
"xls", | |
"ppt", | |
"pptx", | |
] | |
# Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var) | |
TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp")) | |
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim") | |
COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro") | |
PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro") | |
DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro") | |
DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.0-flash") | |
CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.0-flash") | |
COORDINATOR_MODEL_THINKING_BUDGET=2048 | |
PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048 | |
DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1 | |
DATA_ARRANGER_MODEL_THINKING_BUDGET=3072 | |
CODE_GENERATOR_MODEL_THINKING_BUDGET=3072 | |
def validate_config(cls): | |
"""Validate configuration and create necessary directories.""" | |
errors = [] | |
warnings = [] | |
# Check required API keys | |
if not cls.GOOGLE_API_KEY: | |
errors.append("GOOGLE_API_KEY is required - get it from Google AI Studio") | |
# Check for optional but recommended API keys | |
openai_key = os.getenv("OPENAI_API_KEY") | |
if not openai_key: | |
warnings.append("OPENAI_API_KEY not set - OpenAI models will not be available") | |
# Validate and create temp directory | |
try: | |
cls.TEMP_DIR.mkdir(exist_ok=True, parents=True) | |
# Test write permissions | |
test_file = cls.TEMP_DIR / ".write_test" | |
try: | |
test_file.write_text("test") | |
test_file.unlink() | |
except Exception as e: | |
errors.append(f"Cannot write to temp directory {cls.TEMP_DIR}: {e}") | |
except Exception as e: | |
errors.append(f"Cannot create temp directory {cls.TEMP_DIR}: {e}") | |
# Validate file size limits | |
if cls.MAX_FILE_SIZE_MB <= 0: | |
errors.append("MAX_FILE_SIZE_MB must be positive") | |
elif cls.MAX_FILE_SIZE_MB > 100: | |
warnings.append(f"MAX_FILE_SIZE_MB ({cls.MAX_FILE_SIZE_MB}) is very large") | |
# Validate supported file types | |
if not cls.SUPPORTED_FILE_TYPES: | |
errors.append("SUPPORTED_FILE_TYPES cannot be empty") | |
# Validate model names | |
model_fields = ['DATA_EXTRACTOR_MODEL', 'DATA_ARRANGER_MODEL', 'CODE_GENERATOR_MODEL'] | |
for field in model_fields: | |
model_name = getattr(cls, field) | |
if not model_name: | |
errors.append(f"{field} cannot be empty") | |
elif not model_name.startswith(('gemini-', 'gpt-', 'claude-')): | |
warnings.append(f"{field} '{model_name}' may not be a valid model name") | |
# Return validation results | |
if errors: | |
error_msg = "Configuration validation failed:\n" + "\n".join(f"- {error}" for error in errors) | |
if warnings: | |
error_msg += "\n\nWarnings:\n" + "\n".join(f"- {warning}" for warning in warnings) | |
raise ValueError(error_msg) | |
if warnings: | |
import logging | |
logger = logging.getLogger(__name__) | |
logger.warning("Configuration warnings:\n" + "\n".join(f"- {warning}" for warning in warnings)) | |
return True | |
def get_debug_info(cls): | |
"""Get debug information about current configuration.""" | |
import platform | |
import sys | |
return { | |
"python_version": sys.version, | |
"platform": platform.platform(), | |
"temp_dir": str(cls.TEMP_DIR), | |
"temp_dir_exists": cls.TEMP_DIR.exists(), | |
"supported_file_types": len(cls.SUPPORTED_FILE_TYPES), | |
"max_file_size_mb": cls.MAX_FILE_SIZE_MB, | |
"has_google_api_key": bool(cls.GOOGLE_API_KEY), | |
"has_openai_api_key": bool(os.getenv("OPENAI_API_KEY")), | |
"models": { | |
"data_extractor": cls.DATA_EXTRACTOR_MODEL, | |
"data_arranger": cls.DATA_ARRANGER_MODEL, | |
"code_generator": cls.CODE_GENERATOR_MODEL | |
} | |
} | |
settings = Settings() | |