""" OCR utility functions for image processing and OCR operations. This module provides helper functions used across the Historical OCR application. """ import os import base64 import logging from pathlib import Path from typing import Union, Optional # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Try to import optional dependencies try: import pytesseract TESSERACT_AVAILABLE = True except ImportError: logger.warning("pytesseract not available - local OCR fallback will not work") TESSERACT_AVAILABLE = False try: from PIL import Image PILLOW_AVAILABLE = True except ImportError: logger.warning("PIL not available - image preprocessing will be limited") PILLOW_AVAILABLE = False def encode_image_for_api(image_path: Union[str, Path]) -> str: """ Encode an image as base64 data URL for API submission with proper MIME type. Args: image_path: Path to the image file Returns: Base64 data URL for the image """ # Convert to Path object if string image_file = Path(image_path) if isinstance(image_path, str) else image_path # Verify image exists if not image_file.is_file(): raise FileNotFoundError(f"Image file not found: {image_file}") # Determine mime type based on file extension mime_type = 'image/jpeg' # Default mime type suffix = image_file.suffix.lower() if suffix == '.png': mime_type = 'image/png' elif suffix == '.gif': mime_type = 'image/gif' elif suffix in ['.jpg', '.jpeg']: mime_type = 'image/jpeg' elif suffix == '.pdf': mime_type = 'application/pdf' # Encode image as base64 encoded = base64.b64encode(image_file.read_bytes()).decode() return f"data:{mime_type};base64,{encoded}" def try_local_ocr_fallback(file_path: Union[str, Path], base64_data_url: Optional[str] = None) -> Optional[str]: """ Try to perform OCR using local Tesseract as a fallback when the API is unavailable. Args: file_path: Path to the image file base64_data_url: Optional base64 data URL if already available Returns: Extracted text or None if extraction failed """ if not TESSERACT_AVAILABLE or not PILLOW_AVAILABLE: logger.warning("Local OCR fallback is not available (missing dependencies)") return None try: logger.info("Using local Tesseract OCR as fallback") # Use PIL to open the image img = Image.open(file_path) # Use Tesseract to extract text text = pytesseract.image_to_string(img) if text: logger.info("Successfully extracted text using local Tesseract OCR") return text else: logger.warning("Tesseract extracted no text") return None except Exception as e: logger.error(f"Error using local OCR fallback: {str(e)}") return None