File size: 4,879 Bytes
cfeb3a6 90b0a17 0daea93 cfeb3a6 0daea93 cfeb3a6 90b0a17 17e3d1d 90b0a17 cfeb3a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
class Settings:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MAX_FILE_SIZE_MB = 50
SUPPORTED_FILE_TYPES = [
"pdf",
"txt",
"png",
"jpg",
"jpeg",
"docx",
"xlsx",
"csv",
"md",
"json",
"xml",
"html",
"py",
"js",
"ts",
"doc",
"xls",
"ppt",
"pptx",
]
# Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.0-flash")
CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.0-flash")
COORDINATOR_MODEL_THINKING_BUDGET=2048
PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
CODE_GENERATOR_MODEL_THINKING_BUDGET=3072
@classmethod
def validate_config(cls):
"""Validate configuration and create necessary directories."""
errors = []
warnings = []
# Check required API keys
if not cls.GOOGLE_API_KEY:
errors.append("GOOGLE_API_KEY is required - get it from Google AI Studio")
# Check for optional but recommended API keys
openai_key = os.getenv("OPENAI_API_KEY")
if not openai_key:
warnings.append("OPENAI_API_KEY not set - OpenAI models will not be available")
# Validate and create temp directory
try:
cls.TEMP_DIR.mkdir(exist_ok=True, parents=True)
# Test write permissions
test_file = cls.TEMP_DIR / ".write_test"
try:
test_file.write_text("test")
test_file.unlink()
except Exception as e:
errors.append(f"Cannot write to temp directory {cls.TEMP_DIR}: {e}")
except Exception as e:
errors.append(f"Cannot create temp directory {cls.TEMP_DIR}: {e}")
# Validate file size limits
if cls.MAX_FILE_SIZE_MB <= 0:
errors.append("MAX_FILE_SIZE_MB must be positive")
elif cls.MAX_FILE_SIZE_MB > 100:
warnings.append(f"MAX_FILE_SIZE_MB ({cls.MAX_FILE_SIZE_MB}) is very large")
# Validate supported file types
if not cls.SUPPORTED_FILE_TYPES:
errors.append("SUPPORTED_FILE_TYPES cannot be empty")
# Validate model names
model_fields = ['DATA_EXTRACTOR_MODEL', 'DATA_ARRANGER_MODEL', 'CODE_GENERATOR_MODEL']
for field in model_fields:
model_name = getattr(cls, field)
if not model_name:
errors.append(f"{field} cannot be empty")
elif not model_name.startswith(('gemini-', 'gpt-', 'claude-')):
warnings.append(f"{field} '{model_name}' may not be a valid model name")
# Return validation results
if errors:
error_msg = "Configuration validation failed:\n" + "\n".join(f"- {error}" for error in errors)
if warnings:
error_msg += "\n\nWarnings:\n" + "\n".join(f"- {warning}" for warning in warnings)
raise ValueError(error_msg)
if warnings:
import logging
logger = logging.getLogger(__name__)
logger.warning("Configuration warnings:\n" + "\n".join(f"- {warning}" for warning in warnings))
return True
@classmethod
def get_debug_info(cls):
"""Get debug information about current configuration."""
import platform
import sys
return {
"python_version": sys.version,
"platform": platform.platform(),
"temp_dir": str(cls.TEMP_DIR),
"temp_dir_exists": cls.TEMP_DIR.exists(),
"supported_file_types": len(cls.SUPPORTED_FILE_TYPES),
"max_file_size_mb": cls.MAX_FILE_SIZE_MB,
"has_google_api_key": bool(cls.GOOGLE_API_KEY),
"has_openai_api_key": bool(os.getenv("OPENAI_API_KEY")),
"models": {
"data_extractor": cls.DATA_EXTRACTOR_MODEL,
"data_arranger": cls.DATA_ARRANGER_MODEL,
"code_generator": cls.CODE_GENERATOR_MODEL
}
}
settings = Settings()
|