import streamlit as st import os import io import base64 import logging import re from datetime import datetime from pathlib import Path import json # Define exports __all__ = [ 'ProgressReporter', 'create_sidebar_options', 'create_file_uploader', 'display_document_with_images', 'display_previous_results', 'display_about_tab', 'display_results' # Re-export from utils.ui_utils ] from constants import ( DOCUMENT_TYPES, DOCUMENT_LAYOUTS, CUSTOM_PROMPT_TEMPLATES, LAYOUT_PROMPT_ADDITIONS, DEFAULT_PDF_DPI, MIN_PDF_DPI, MAX_PDF_DPI, DEFAULT_MAX_PAGES, PERFORMANCE_MODES, PREPROCESSING_DOC_TYPES, ROTATION_OPTIONS ) from utils.text_utils import format_ocr_text, clean_raw_text, format_markdown_text # Import from text_utils from utils.content_utils import ( classify_document_content, extract_document_text, extract_image_description ) from utils.ui_utils import display_results from preprocessing import preprocess_image class ProgressReporter: """Class to handle progress reporting in the UI""" def __init__(self, placeholder): self.placeholder = placeholder self.progress_bar = None self.status_text = None def setup(self): """Setup the progress components""" with self.placeholder.container(): self.progress_bar = st.progress(0) self.status_text = st.empty() return self def update(self, percent, status_text): """Update the progress bar and status text""" if self.progress_bar is not None: self.progress_bar.progress(percent / 100) if self.status_text is not None: self.status_text.text(status_text) def complete(self, success=True): """Complete the progress reporting""" if success: if self.progress_bar is not None: self.progress_bar.progress(100) if self.status_text is not None: self.status_text.text("Processing complete!") else: if self.status_text is not None: self.status_text.text("Processing failed.") # Clear the progress components after a delay import time time.sleep(0.8) # Short delay to show completion if self.progress_bar is not None: self.progress_bar.empty() if self.status_text is not None: self.status_text.empty() def create_sidebar_options(): """Create and return sidebar options""" with st.sidebar: st.markdown("## OCR Settings") # Create a container for the sidebar options with st.container(): # Default to using vision model (removed selection from UI) use_vision = True # Document type selection doc_type = st.selectbox("Document Type", DOCUMENT_TYPES, help="Select the type of document you're processing for better results") # Document layout doc_layout = st.selectbox("Document Layout", DOCUMENT_LAYOUTS, help="Select the layout of your document") # Initialize preprocessing variables with default values grayscale = False denoise = False contrast = 0 rotation = 0 use_segmentation = False # Custom prompt custom_prompt = "" # Get the template for the selected document type if not auto-detect if doc_type != DOCUMENT_TYPES[0]: prompt_template = CUSTOM_PROMPT_TEMPLATES.get(doc_type, "") # Add layout information if not standard if doc_layout != DOCUMENT_LAYOUTS[0]: # Not standard layout layout_addition = LAYOUT_PROMPT_ADDITIONS.get(doc_layout, "") if layout_addition: prompt_template += " " + layout_addition # Set the custom prompt custom_prompt = prompt_template # Allow user to edit the prompt (always visible) custom_prompt = st.text_area("Custom Processing Instructions", value=custom_prompt, help="Customize the instructions for processing this document", height=80) # Image preprocessing options (always visible) st.markdown("### Image Preprocessing") # Grayscale conversion grayscale = st.checkbox("Convert to Grayscale", value=True, help="Convert color images to grayscale for better text recognition") # Light denoising option denoise = st.checkbox("Light Denoising", value=True, help="Apply gentle denoising to improve text clarity") # Contrast adjustment contrast = st.slider("Contrast Adjustment", min_value=-20, max_value=20, value=5, step=5, help="Adjust image contrast (limited range)") # Initialize rotation (keeping it set to 0) rotation = 0 use_segmentation = False # Create preprocessing options dictionary # Map UI document types to preprocessing document types doc_type_for_preprocessing = "standard" if "Handwritten" in doc_type: doc_type_for_preprocessing = "handwritten" elif "Newspaper" in doc_type or "Magazine" in doc_type: doc_type_for_preprocessing = "newspaper" elif "Book" in doc_type or "Publication" in doc_type: doc_type_for_preprocessing = "book" # Match the actual preprocessing type preprocessing_options = { "document_type": doc_type_for_preprocessing, "grayscale": grayscale, "denoise": denoise, "contrast": contrast, "rotation": rotation } # PDF-specific options st.markdown("### PDF Options") max_pages = st.number_input("Maximum Pages to Process", min_value=1, max_value=20, value=DEFAULT_MAX_PAGES, help="Limit the number of pages to process (for multi-page PDFs)") # Set default values for removed options pdf_dpi = DEFAULT_PDF_DPI pdf_rotation = 0 # Create options dictionary options = { "use_vision": use_vision, "perf_mode": "Quality", # Default to Quality, removed performance mode option "pdf_dpi": pdf_dpi, "max_pages": max_pages, "pdf_rotation": pdf_rotation, "custom_prompt": custom_prompt, "preprocessing_options": preprocessing_options, "use_segmentation": use_segmentation if 'use_segmentation' in locals() else False } return options def create_file_uploader(): """Create and return a file uploader""" # Add app description st.markdown(f'
Made possible by Mistral AI
", unsafe_allow_html=True) # Add project framing st.markdown(""" This tool assists scholars in historical research by extracting text from challenging documents. While it may not achieve 100% accuracy, it helps navigate: - **Historical newspapers** with complex layouts - **Handwritten documents** from various periods - **Photos of archival materials** Upload a document to begin, or explore the examples. """) # Create file uploader with a more concise label uploaded_file = st.file_uploader( "Select file", type=["pdf", "png", "jpg"], help="Upload a PDF or image file for OCR processing" ) return uploaded_file def display_document_with_images(result): """Display document with images""" # Check for pages_data first if 'pages_data' in result and result['pages_data']: pages_data = result['pages_data'] # If pages_data not available, try to extract from raw_response_data elif 'raw_response_data' in result and isinstance(result['raw_response_data'], dict) and 'pages' in result['raw_response_data']: # Build pages_data from raw_response_data pages_data = [] raw_pages = result['raw_response_data']['pages'] for page_idx, page in enumerate(raw_pages): if not isinstance(page, dict): continue page_data = { 'page_number': page_idx + 1, 'markdown': page.get('markdown', ''), 'images': [] } # Extract images if present if 'images' in page and isinstance(page['images'], list): for img_idx, img in enumerate(page['images']): if isinstance(img, dict) and ('base64' in img or 'image_base64' in img): img_base64 = img.get('image_base64', img.get('base64', '')) if img_base64: page_data['images'].append({ 'id': img.get('id', f"img_{page_idx}_{img_idx}"), 'image_base64': img_base64 }) if page_data['markdown'] or page_data['images']: pages_data.append(page_data) else: st.info("No image data available.") return # Display each page for i, page_data in enumerate(pages_data): st.markdown(f"### Page {i+1}") # Display only the image (removed text column) # Display the image - check multiple possible field names image_displayed = False # Try 'image_data' field first if 'image_data' in page_data: try: # Convert base64 to image image_data = base64.b64decode(page_data['image_data']) st.image(io.BytesIO(image_data), use_container_width=True) image_displayed = True except Exception as e: st.error(f"Error displaying image from image_data: {str(e)}") # Try 'images' array if image_data didn't work if not image_displayed and 'images' in page_data and len(page_data['images']) > 0: for img in page_data['images']: if 'image_base64' in img: try: st.image(img['image_base64'], use_container_width=True) image_displayed = True break except Exception as e: st.error(f"Error displaying image from images array: {str(e)}") # Try alternative image source if still not displayed if not image_displayed and 'raw_response_data' in result: raw_data = result['raw_response_data'] if isinstance(raw_data, dict) and 'pages' in raw_data: for raw_page in raw_data['pages']: if isinstance(raw_page, dict) and 'images' in raw_page: for img in raw_page['images']: if isinstance(img, dict) and 'base64' in img: st.image(img['base64'], use_container_width=True) st.caption("Image from OCR response") image_displayed = True break if image_displayed: break if not image_displayed: st.info("No image available for this page.") # Extract and display alt text if available page_text = "" if 'text' in page_data: page_text = page_data['text'] elif 'markdown' in page_data: page_text = page_data['markdown'] if page_text and page_text.startswith("![") and page_text.endswith(")"): try: alt_text = page_text[2:page_text.index(']')] if alt_text and len(alt_text) > 5: # Only show if alt text is meaningful st.caption(f"Image description: {alt_text}") except: pass def display_previous_results(): """Display previous results tab content in a simplified, structured view""" # Use a simple header without the button column st.header("Previous Results") # Display previous results if available if not st.session_state.previous_results: st.markdown("""Process a document to see your results history.