File size: 1,549 Bytes
cfeb3a6
 
 
 
 
 
 
 
17e3d1d
cfeb3a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17e3d1d
 
cfeb3a6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()


class Settings:
    GOOGLE_AI_API_KEY = os.getenv("GOOGLE_API_KEY")
    MAX_FILE_SIZE_MB = 50
    SUPPORTED_FILE_TYPES = [
        "pdf",
        "txt",
        "png",
        "jpg",
        "jpeg",
        "docx",
        "xlsx",
        "csv",
        "md",
        "json",
        "xml",
        "html",
        "py",
        "js",
        "ts",
        "doc",
        "xls",
        "ppt",
        "pptx",
    ]
    # Use /tmp for temporary files on Hugging Face Spaces (or override with TEMP_DIR env var)
    TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/data_extractor_temp"))
    DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "python:3.12-slim")
    COORDINATOR_MODEL = os.getenv("COORDINATOR_MODEL", "gemini-2.5-pro")
    PROMPT_ENGINEER_MODEL = os.getenv("PROMPT_ENGINEER_MODEL", "gemini-2.5-pro")
    DATA_EXTRACTOR_MODEL = os.getenv("DATA_EXTRACTOR_MODEL", "gemini-2.5-pro")
    DATA_ARRANGER_MODEL = os.getenv("DATA_ARRANGER_MODEL", "gemini-2.5-pro")
    CODE_GENERATOR_MODEL = os.getenv("CODE_GENERATOR_MODEL", "gemini-2.5-pro")

    COORDINATOR_MODEL_THINKING_BUDGET=2048
    PROMPT_ENGINEER_MODEL_THINKING_BUDGET=2048
    DATA_EXTRACTOR_MODEL_THINKING_BUDGET=-1
    DATA_ARRANGER_MODEL_THINKING_BUDGET=3072
    CODE_GENERATOR_MODEL_THINKING_BUDGET=3072

    @classmethod
    def validate_config(cls):
        if not cls.GOOGLE_API_KEY:
            raise ValueError("GOOGLE_API_KEY required")
        cls.TEMP_DIR.mkdir(exist_ok=True)


settings = Settings()