"""
Constants for the Historical OCR application.

This module contains all the constants used throughout the application,
making it easier to maintain and update values in one place.
"""

# API limits
MAX_FILE_SIZE_MB = 50
MAX_PAGES = 20

# Caching
CACHE_TTL_SECONDS = 24 * 3600  # 24 hours
MAX_CACHE_ENTRIES = 20

# Image processing
MAX_IMAGE_DIMENSION = 2500
IMAGE_QUALITY = 92

# Document types
DOCUMENT_TYPES = [
    "Auto-detect (standard processing)",
    "Newspaper or Magazine",
    "Letter or Correspondence",
    "Book or Publication",
    "Form or Legal Document",
    "Recipe",
    "Handwritten Document",
    "Map or Illustration",
    "Table or Spreadsheet",
    "Other (specify in instructions)"
]

# Document layouts
DOCUMENT_LAYOUTS = [
    "Standard layout",
    "Multiple columns",
    "Table/grid format",
    "Mixed layout with images"
]

# Preprocessing document types
PREPROCESSING_DOC_TYPES = ["standard", "handwritten", "typed", "printed"]

# Rotation options
ROTATION_OPTIONS = [0, 90, 180, 270]

# PDF settings
DEFAULT_PDF_DPI = 100
MIN_PDF_DPI = 72
MAX_PDF_DPI = 300
DEFAULT_MAX_PAGES = 3

# Performance modes
PERFORMANCE_MODES = ["Quality", "Speed"]

# Custom prompt templates
CUSTOM_PROMPT_TEMPLATES = {
    "Newspaper or Magazine": "This is a newspaper/magazine. Process columns from top to bottom, capture headlines, bylines, article text and captions.",
    "Letter or Correspondence": "This is a letter/correspondence. Capture letterhead, date, greeting, body, closing and signature. Note any handwritten annotations.",
    "Book or Publication": "This is a book/publication. Extract titles, headers, footnotes, page numbers and body text. Preserve paragraph structure and any special formatting.",
    "Form or Legal Document": "This is a form/legal document. Extract all field labels and values, preserving the structure. Pay special attention to signature lines, dates, and any official markings.",
    "Recipe": "This is a recipe. Extract title, ingredients list with measurements, and preparation instructions. Maintain the distinction between ingredients and preparation steps.",
    "Handwritten Document": "This is a handwritten document. Carefully transcribe all handwritten text, preserving line breaks. Note any unclear sections or annotations.",
    "Map or Illustration": "This is a map or illustration. Transcribe all labels, legends, captions, and annotations. Note any scale indicators or directional markings.",
    "Table or Spreadsheet": "This is a table/spreadsheet. Preserve row and column structure, maintaining alignment of data. Extract headers and all cell values.",
    "Other (specify in instructions)": "Please describe the document type and any special processing requirements here."
}

# Layout prompt additions
LAYOUT_PROMPT_ADDITIONS = {
    "Multiple columns": "Document has multiple columns. Read each column from top to bottom, then move to the next column.",
    "Table/grid format": "Document contains table data. Preserve row and column structure during extraction.",
    "Mixed layout with images": "Document has mixed text layout with images. Extract text in proper reading order."
}

# Content themes for subject tag extraction
CONTENT_THEMES = {
    "Historical": ["century", "ancient", "historical", "history", "vintage", "archive", "heritage"],
    "Travel": ["travel", "journey", "expedition", "exploration", "voyage", "map", "location"],
    "Science": ["experiment", "research", "study", "analysis", "scientific", "laboratory"],
    "Literature": ["book", "novel", "poetry", "author", "literary", "chapter", "story"],
    "Art": ["painting", "illustration", "drawing", "artist", "exhibit", "gallery", "portrait"],
    "Education": ["education", "school", "university", "college", "learning", "student", "teach"],
    "Politics": ["government", "political", "policy", "administration", "election", "legislature"],
    "Business": ["business", "company", "corporation", "market", "industry", "commercial", "trade"],
    "Social": ["society", "community", "social", "culture", "tradition", "customs"],
    "Technology": ["technology", "invention", "device", "mechanical", "machine", "technical"],
    "Military": ["military", "army", "navy", "war", "battle", "soldier", "weapon"],
    "Religion": ["religion", "church", "temple", "spiritual", "sacred", "ritual"],
    "Medicine": ["medical", "medicine", "health", "hospital", "treatment", "disease", "doctor"],
    "Legal": ["legal", "law", "court", "justice", "attorney", "judicial", "statute"],
    "Correspondence": ["letter", "mail", "correspondence", "message", "communication"]
}

# Period tags based on year ranges
PERIOD_TAGS = {
    (0, 1799): "Pre-1800s",
    (1800, 1849): "Early 19th Century",
    (1850, 1899): "Late 19th Century",
    (1900, 1949): "Early 20th Century",
    (1950, 2099): "Modern Era"
}

# Default fallback tags
DEFAULT_TAGS = ["Document", "Historical", "Text"]
GENERIC_TAGS = ["Archive", "Content", "Record"]

# UI constants
PROGRESS_DELAY = 0.8  # Seconds to show completion message