Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

milwright commited on Mar 28

Commit

3f83f08

verified ·

1 Parent(s): 59aaeae

Update historical-ocr application with enhanced features

Browse files

Files changed (8) hide show

README.md +1 -12
app.py +7 -7
config.py +2 -1
input/baldwin-letters-combined.jpg +3 -0
input/revere.jpg +3 -0
ocr_utils.py +151 -84
requirements.txt +1 -1
ui/custom.css +32 -0

README.md CHANGED Viewed

@@ -1,14 +1,3 @@
----
-title: Historical OCR with Contextual Intelligence
-emoji: 📜
-colorFrom: indigo
-colorTo: purple
-sdk: streamlit
-sdk_version: "1.28.0"
-app_file: app.py
-pinned: false
----
 # Historical OCR with Contextual Intelligence
 An advanced OCR application for historical document analysis using Mistral AI.
@@ -43,4 +32,4 @@ Built with Streamlit and Mistral AI's OCR and large language model capabilities.
 ---
-Created by Zach Muhlbauer, CUNY Graduate Center

 # Historical OCR with Contextual Intelligence
 An advanced OCR application for historical document analysis using Mistral AI.
 ---
+Created by [Add your name/organization]

app.py CHANGED Viewed

@@ -827,7 +827,7 @@ with main_tab2:
                             images = page.get('images', [])
                             for img in images:
                                 if 'image_base64' in img:
-                                    st.image(img['image_base64'], width=600)
                             # Display text content if available
                             text_content = page.get('markdown', '')
@@ -925,7 +925,7 @@ with main_tab1:
         # Process button - flush left with similar padding as file browser
         with left_col:
-            process_button = st.button("Process Document")
             # Image preprocessing preview in upload column, right after the process button
             if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
@@ -934,13 +934,13 @@ with main_tab1:
                     with preview_cols[0]:
                         st.markdown("**Original Image**")
-                        st.image(uploaded_file, width=600)
                     with preview_cols[1]:
                         st.markdown("**Preprocessed Image**")
                         try:
                             processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
-                            st.image(io.BytesIO(processed_bytes), width=600)
                         except Exception as e:
                             st.error(f"Error in preprocessing: {str(e)}")
                             st.info("Try using grayscale preprocessing for PNG images with transparency")
@@ -1636,7 +1636,7 @@ with main_tab1:
                         with columns1[i]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
-                                    st.image(str(img_path), caption=img_path.name, width=300)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:
@@ -1649,7 +1649,7 @@ with main_tab1:
                         with columns2[i]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
-                                    st.image(str(img_path), caption=img_path.name, width=300)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:
@@ -1662,7 +1662,7 @@ with main_tab1:
                         with columns[i % len(columns)]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
-                                    st.image(str(img_path), caption=img_path.name, width=300)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:

                             images = page.get('images', [])
                             for img in images:
                                 if 'image_base64' in img:
+                                    st.image(img['image_base64'], use_container_width=True)
                             # Display text content if available
                             text_content = page.get('markdown', '')
         # Process button - flush left with similar padding as file browser
         with left_col:
+            process_button = st.button("Process Document", use_container_width=True)
             # Image preprocessing preview in upload column, right after the process button
             if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
                     with preview_cols[0]:
                         st.markdown("**Original Image**")
+                        st.image(uploaded_file, use_container_width=True)
                     with preview_cols[1]:
                         st.markdown("**Preprocessed Image**")
                         try:
                             processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
+                            st.image(io.BytesIO(processed_bytes), use_container_width=True)
                         except Exception as e:
                             st.error(f"Error in preprocessing: {str(e)}")
                             st.info("Try using grayscale preprocessing for PNG images with transparency")
                         with columns1[i]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
+                                    st.image(str(img_path), caption=img_path.name, use_container_width=True)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:
                         with columns2[i]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
+                                    st.image(str(img_path), caption=img_path.name, use_container_width=True)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:
                         with columns[i % len(columns)]:
                             if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
                                 try:
+                                    st.image(str(img_path), caption=img_path.name, use_container_width=True)
                                 except Exception:
                                     st.info(f"Example: {img_path.name}")
                             else:

config.py CHANGED Viewed

@@ -22,7 +22,8 @@ MISTRAL_API_KEY = os.environ.get("HF_MISTRAL_API_KEY",
                   os.environ.get("MISTRAL_API_KEY", "")).strip()
 # Check if we're in test mode (allows operation without valid API key)
-TEST_MODE = False  # Disable test mode for production use
 # Just check if API key exists
 if not MISTRAL_API_KEY and not TEST_MODE:

                   os.environ.get("MISTRAL_API_KEY", "")).strip()
 # Check if we're in test mode (allows operation without valid API key)
+# Enable test mode for diagnosing OCR issues
+TEST_MODE = True
 # Just check if API key exists
 if not MISTRAL_API_KEY and not TEST_MODE:

input/baldwin-letters-combined.jpg ADDED Viewed

Git LFS Details

SHA256: e43d067402153ca1a9c3d0f04c8072b079b6536e30f0b663e6ea27f81cc308d5
Pointer size: 131 Bytes
Size of remote file: 400 kB

input/revere.jpg ADDED Viewed

Git LFS Details

SHA256: a3b69e20a222e60187cb1492a45ffe2233e8bfecdec02a25395ac0a699316826
Pointer size: 130 Bytes
Size of remote file: 22.8 kB

ocr_utils.py CHANGED Viewed

@@ -9,6 +9,7 @@ import io
 import zipfile
 import logging
 import numpy as np
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Union, Any, Tuple
@@ -554,6 +555,8 @@ def _detect_document_type_impl(img_hash=None) -> bool:
     """
     Optimized implementation of document type detection for faster processing.
     The img_hash parameter is unused but kept for backward compatibility.
     """
     # Fast path: Get the image from thread-local storage
     if not hasattr(_detect_document_type_impl, "_current_img"):
@@ -566,26 +569,6 @@ def _detect_document_type_impl(img_hash=None) -> bool:
     if width * height < 100000:  # Approx 300x300 or smaller
         return False
-    # Quick check: If image has many colors, it's likely not a document
-    # Sample a subset of pixels for color analysis (faster than full histogram)
-    try:
-        # Sample pixels in a grid pattern
-        color_samples = []
-        for x in range(0, width, max(1, width // 10)):
-            for y in range(0, height, max(1, height // 10)):
-                try:
-                    color_samples.append(img.getpixel((x, y)))
-                except:
-                    pass
-        # Count unique colors in the sample
-        if img.mode == 'RGB':
-            unique_colors = len(set(color_samples))
-            if unique_colors > 1000:  # Many unique colors suggest a photo, not a document
-                return False
-    except:
-        pass  # If sampling fails, continue with regular analysis
     # Convert to grayscale for analysis (using faster conversion)
     gray_img = img.convert('L')
@@ -609,7 +592,7 @@ def _detect_document_type_impl(img_hash=None) -> bool:
         # Count edge pixels using threshold (faster than summing individual pixels)
         edge_data = edges.getdata()
-        edge_threshold = 50
         # Use list comprehension for better performance
         edge_count = sum(1 for p in edge_data if p > edge_threshold)
@@ -621,18 +604,17 @@ def _detect_document_type_impl(img_hash=None) -> bool:
         bright_ratio = bright_count / (width * height)
         # Documents typically have more edges (text boundaries) and bright areas (background)
-        return edge_ratio > 0.05 or bright_ratio > 0.4
-    # OpenCV path - optimized for speed
     img_np = np.array(gray_img)
-    # Fast document detection heuristics
     # 1. Fast check: Variance of pixel values
-    # Documents typically have high variance (black text on white background)
-    # Use numpy's fast statistical functions
     std_dev = np.std(img_np)
-    if std_dev > 60:  # High standard deviation suggests document
         return True
     # 2. Quick check using downsampled image for edges
@@ -643,22 +625,38 @@ def _detect_document_type_impl(img_hash=None) -> bool:
     else:
         small_img = img_np
-    # Use faster edge detection
-    edges = cv2.Canny(small_img, 50, 150, L2gradient=False)
     edge_ratio = np.count_nonzero(edges) / edges.size
     # 3. Fast histogram approximation using bins
     # Instead of calculating full histogram, use bins for dark and light regions
-    dark_mask = img_np < 50
-    light_mask = img_np > 200
     dark_ratio = np.count_nonzero(dark_mask) / img_np.size
     light_ratio = np.count_nonzero(light_mask) / img_np.size
     # Combine heuristics for final decision
     # Documents typically have both dark (text) and light (background) regions,
     # and/or well-defined edges
-    return (dark_ratio > 0.05 and light_ratio > 0.3) or edge_ratio > 0.04
 # Removed caching to fix unhashable type error
 def preprocess_document_image(img: Image.Image) -> Image.Image:
@@ -678,7 +676,8 @@ def preprocess_document_image(img: Image.Image) -> Image.Image:
 def _preprocess_document_image_impl() -> Image.Image:
     """
-    Optimized implementation of document preprocessing with adaptive processing based on image size
     """
     # Fast path: Get image from thread-local storage
     if not hasattr(preprocess_document_image, "_current_img"):
@@ -690,94 +689,162 @@ def _preprocess_document_image_impl() -> Image.Image:
     width, height = img.size
     img_size = width * height
     # Ultra-fast path for tiny images - just convert to grayscale with contrast enhancement
     if img_size < 300000:  # ~500x600 or smaller
         gray = img.convert('L')
         enhancer = ImageEnhance.Contrast(gray)
-        return enhancer.enhance(IMAGE_PREPROCESSING["enhance_contrast"])
     # Fast path for small images - minimal processing
     if img_size < 1000000:  # ~1000x1000 or smaller
         gray = img.convert('L')
         enhancer = ImageEnhance.Contrast(gray)
-        enhanced = enhancer.enhance(IMAGE_PREPROCESSING["enhance_contrast"])
         # Light sharpening only if sharpen is enabled
         if IMAGE_PREPROCESSING["sharpen"]:
-            enhanced = enhanced.filter(ImageFilter.SHARPEN)
         return enhanced
     # Standard path for medium images
     # Convert to grayscale (faster processing)
     gray = img.convert('L')
-    # Improve contrast - key for text visibility
     enhancer = ImageEnhance.Contrast(gray)
-    enhanced = enhancer.enhance(IMAGE_PREPROCESSING["enhance_contrast"])
-    # Apply light sharpening for text clarity
     if IMAGE_PREPROCESSING["sharpen"]:
-        enhanced = enhanced.filter(ImageFilter.SHARPEN)
-    # Advanced processing for larger images or when OpenCV is available
-    # The following optimizations improve OCR accuracy significantly for complex documents
-    if img_size > 1500000 and CV2_AVAILABLE and IMAGE_PREPROCESSING["denoise"]:
         try:
             # Convert to numpy array for OpenCV processing
             img_np = np.array(enhanced)
-            # Optimize denoising parameters based on image size
-            if img_size > 4000000:  # Very large images (~2000x2000 or larger)
-                # More aggressive downsampling for very large images
-                scale_factor = 0.5
-                downsample = cv2.resize(img_np, None, fx=scale_factor, fy=scale_factor,
-                                      interpolation=cv2.INTER_AREA)
-                # Lighter denoising for downsampled image
-                h_value = 7  # Strength parameter
-                template_window = 5
-                search_window = 13
-                # Apply denoising on smaller image
-                denoised_np = cv2.fastNlMeansDenoising(downsample, None, h_value, template_window, search_window)
-                # Resize back to original size
-                denoised_np = cv2.resize(denoised_np, (width, height), interpolation=cv2.INTER_LINEAR)
             else:
-                # Direct denoising for medium-large images
-                h_value = 8  # Balanced for speed and quality
-                template_window = 5
-                search_window = 15
-                # Apply denoising
-                denoised_np = cv2.fastNlMeansDenoising(img_np, None, h_value, template_window, search_window)
-            # Convert back to PIL Image
-            enhanced = Image.fromarray(denoised_np)
-            # Apply adaptive thresholding only if it improves text visibility
-            # Create a binarized version of the image
-            if img_size < 8000000:  # Skip for extremely large images to save processing time
-                binary = cv2.adaptiveThreshold(denoised_np, 255,
-                                             cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                             cv2.THRESH_BINARY, 11, 2)
-                # Quick verification that binarization preserves text information
-                # Use simplified check that works well for document images
-                white_pixels_binary = np.count_nonzero(binary > 200)
-                white_pixels_orig = np.count_nonzero(denoised_np > 200)
-                # Check if binary preserves reasonable amount of white pixels (background)
-                if white_pixels_binary > white_pixels_orig * 0.8:
-                    # Binarization looks good, use it
-                    return Image.fromarray(binary)
         except Exception as e:
             # If OpenCV processing fails, continue with PIL-enhanced image
             pass
     elif IMAGE_PREPROCESSING["denoise"]:
         # Fallback PIL denoising for systems without OpenCV
-        # Use lighter median filter
-        enhanced = enhanced.filter(ImageFilter.MedianFilter(3))
     # Return enhanced grayscale image
     return enhanced

 import zipfile
 import logging
 import numpy as np
+import time
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Union, Any, Tuple
     """
     Optimized implementation of document type detection for faster processing.
     The img_hash parameter is unused but kept for backward compatibility.
+    Enhanced to better detect handwritten documents.
     """
     # Fast path: Get the image from thread-local storage
     if not hasattr(_detect_document_type_impl, "_current_img"):
     if width * height < 100000:  # Approx 300x300 or smaller
         return False
     # Convert to grayscale for analysis (using faster conversion)
     gray_img = img.convert('L')
         # Count edge pixels using threshold (faster than summing individual pixels)
         edge_data = edges.getdata()
+        edge_threshold = 40  # Lowered threshold to better detect handwritten texts
         # Use list comprehension for better performance
         edge_count = sum(1 for p in edge_data if p > edge_threshold)
         bright_ratio = bright_count / (width * height)
         # Documents typically have more edges (text boundaries) and bright areas (background)
+        # Lowered edge threshold to better detect handwritten documents
+        return edge_ratio > 0.035 or bright_ratio > 0.4
+    # OpenCV path - optimized for speed and enhanced for handwritten documents
     img_np = np.array(gray_img)
     # 1. Fast check: Variance of pixel values
+    # Documents typically have high variance (text on background)
+    # Handwritten documents may have less contrast than printed text
     std_dev = np.std(img_np)
+    if std_dev > 45:  # Lowered threshold to better detect handwritten documents
         return True
     # 2. Quick check using downsampled image for edges
     else:
         small_img = img_np
+    # Use adaptive edge detection parameters for handwritten documents
+    # Lowered threshold to better detect fainter handwritten text
+    edges = cv2.Canny(small_img, 30, 130, L2gradient=False)
     edge_ratio = np.count_nonzero(edges) / edges.size
     # 3. Fast histogram approximation using bins
     # Instead of calculating full histogram, use bins for dark and light regions
+    # Adjusted for handwritten documents which may have more gray values
+    dark_mask = img_np < 60  # Increased threshold to capture lighter handwritten text
+    light_mask = img_np > 180  # Lowered threshold to account for aged paper
     dark_ratio = np.count_nonzero(dark_mask) / img_np.size
     light_ratio = np.count_nonzero(light_mask) / img_np.size
+    # Special analysis for handwritten documents
+    # Check for line-like structures typical in handwritten text
+    if CV2_AVAILABLE and edge_ratio > 0.02:  # Lower threshold to capture handwritten documents
+        # Try to find line segments that could indicate text lines
+        lines = cv2.HoughLinesP(edges, 1, np.pi/180,
+                               threshold=50,  # Lower threshold for detection
+                               minLineLength=30,  # Shorter lines for handwriting
+                               maxLineGap=20)   # Larger gap for discontinuous handwriting
+        # If we find enough line segments, it's likely a document with text
+        if lines is not None and len(lines) > 10:
+            return True
     # Combine heuristics for final decision
     # Documents typically have both dark (text) and light (background) regions,
     # and/or well-defined edges
+    # Lower thresholds for handwritten documents
+    return (dark_ratio > 0.03 and light_ratio > 0.25) or edge_ratio > 0.03
 # Removed caching to fix unhashable type error
 def preprocess_document_image(img: Image.Image) -> Image.Image:
 def _preprocess_document_image_impl() -> Image.Image:
     """
+    Optimized implementation of document preprocessing with adaptive processing based on image size.
+    Enhanced for better handwritten document processing.
     """
     # Fast path: Get image from thread-local storage
     if not hasattr(preprocess_document_image, "_current_img"):
     width, height = img.size
     img_size = width * height
+    # Check if the image might be a handwritten document - use special processing
+    is_handwritten = False
+    try:
+        # Simple check for handwritten document characteristics
+        # Handwritten documents often have more varied strokes and less stark contrast
+        if CV2_AVAILABLE:
+            # Convert to grayscale and calculate local variance
+            gray_np = np.array(img.convert('L'))
+            # Higher variance in edge strengths can indicate handwriting
+            edges = cv2.Canny(gray_np, 30, 100)
+            if np.count_nonzero(edges) / edges.size > 0.02:  # Low edge threshold for handwriting
+                # Additional check with gradient magnitudes
+                sobelx = cv2.Sobel(gray_np, cv2.CV_64F, 1, 0, ksize=3)
+                sobely = cv2.Sobel(gray_np, cv2.CV_64F, 0, 1, ksize=3)
+                magnitude = np.sqrt(sobelx**2 + sobely**2)
+                # Handwriting typically has more variation in gradient magnitudes
+                if np.std(magnitude) > 20:
+                    is_handwritten = True
+    except:
+        # If detection fails, assume it's not handwritten
+        pass
     # Ultra-fast path for tiny images - just convert to grayscale with contrast enhancement
     if img_size < 300000:  # ~500x600 or smaller
         gray = img.convert('L')
+        # Lower contrast enhancement for handwritten documents
+        contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
         enhancer = ImageEnhance.Contrast(gray)
+        return enhancer.enhance(contrast_level)
     # Fast path for small images - minimal processing
     if img_size < 1000000:  # ~1000x1000 or smaller
         gray = img.convert('L')
+        # Use gentler contrast enhancement for handwritten documents
+        contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
         enhancer = ImageEnhance.Contrast(gray)
+        enhanced = enhancer.enhance(contrast_level)
         # Light sharpening only if sharpen is enabled
+        # Use milder sharpening for handwritten documents to preserve stroke detail
         if IMAGE_PREPROCESSING["sharpen"]:
+            if is_handwritten:
+                # Use edge enhancement which is gentler than SHARPEN for handwriting
+                enhanced = enhanced.filter(ImageFilter.EDGE_ENHANCE)
+            else:
+                enhanced = enhanced.filter(ImageFilter.SHARPEN)
         return enhanced
     # Standard path for medium images
     # Convert to grayscale (faster processing)
     gray = img.convert('L')
+    # Adaptive contrast enhancement based on document type
+    contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
     enhancer = ImageEnhance.Contrast(gray)
+    enhanced = enhancer.enhance(contrast_level)
+    # Apply light sharpening for text clarity - adapt based on document type
     if IMAGE_PREPROCESSING["sharpen"]:
+        if is_handwritten:
+            # Use edge enhancement which is gentler than SHARPEN for handwriting
+            enhanced = enhanced.filter(ImageFilter.EDGE_ENHANCE)
+        else:
+            enhanced = enhanced.filter(ImageFilter.SHARPEN)
+    # Advanced processing with OpenCV if available
+    if CV2_AVAILABLE and IMAGE_PREPROCESSING["denoise"]:
         try:
             # Convert to numpy array for OpenCV processing
             img_np = np.array(enhanced)
+            if is_handwritten:
+                # Special treatment for handwritten documents
+                # Use guided filter which preserves edges better than NLMeans
+                # Guided filter works well for handwriting by preserving stroke details
+                if img_size > 3000000:  # Large images - downsample first
+                    scale_factor = 0.5
+                    small_img = cv2.resize(img_np, None, fx=scale_factor, fy=scale_factor,
+                                          interpolation=cv2.INTER_AREA)
+                    # Apply bilateral filter which preserves edges while smoothing
+                    filtered = cv2.bilateralFilter(small_img, 9, 75, 75)
+                    # Resize back
+                    filtered = cv2.resize(filtered, (width, height), interpolation=cv2.INTER_LINEAR)
+                else:
+                    # Use bilateral filter directly for smaller images
+                    filtered = cv2.bilateralFilter(img_np, 7, 50, 50)
+                # Convert back to PIL Image
+                enhanced = Image.fromarray(filtered)
+                # For handwritten docs, avoid binary thresholding which can destroy subtle strokes
+                return enhanced
             else:
+                # Standard document processing - optimized for printed text
+                # Optimize denoising parameters based on image size
+                if img_size > 4000000:  # Very large images
+                    # More aggressive downsampling for very large images
+                    scale_factor = 0.5
+                    downsample = cv2.resize(img_np, None, fx=scale_factor, fy=scale_factor,
+                                          interpolation=cv2.INTER_AREA)
+                    # Lighter denoising for downsampled image
+                    h_value = 7  # Strength parameter
+                    template_window = 5
+                    search_window = 13
+                    # Apply denoising on smaller image
+                    denoised_np = cv2.fastNlMeansDenoising(downsample, None, h_value, template_window, search_window)
+                    # Resize back to original size
+                    denoised_np = cv2.resize(denoised_np, (width, height), interpolation=cv2.INTER_LINEAR)
+                else:
+                    # Direct denoising for medium-large images
+                    h_value = 8  # Balanced for speed and quality
+                    template_window = 5
+                    search_window = 15
+                    # Apply denoising
+                    denoised_np = cv2.fastNlMeansDenoising(img_np, None, h_value, template_window, search_window)
+                # Convert back to PIL Image
+                enhanced = Image.fromarray(denoised_np)
+                # Apply adaptive thresholding only if it improves text visibility
+                # Create a binarized version of the image
+                if img_size < 8000000:  # Skip for extremely large images to save processing time
+                    binary = cv2.adaptiveThreshold(denoised_np, 255,
+                                                 cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                                 cv2.THRESH_BINARY, 11, 2)
+                    # Quick verification that binarization preserves text information
+                    # Use simplified check that works well for document images
+                    white_pixels_binary = np.count_nonzero(binary > 200)
+                    white_pixels_orig = np.count_nonzero(denoised_np > 200)
+                    # Check if binary preserves reasonable amount of white pixels (background)
+                    if white_pixels_binary > white_pixels_orig * 0.8:
+                        # Binarization looks good, use it
+                        return Image.fromarray(binary)
+                return enhanced
         except Exception as e:
             # If OpenCV processing fails, continue with PIL-enhanced image
             pass
     elif IMAGE_PREPROCESSING["denoise"]:
         # Fallback PIL denoising for systems without OpenCV
+        if is_handwritten:
+            # Lighter filtering for handwritten text to preserve details
+            # Use a smaller median filter for handwritten documents
+            enhanced = enhanced.filter(ImageFilter.MedianFilter(1))
+        else:
+            # Standard filtering for printed documents
+            enhanced = enhanced.filter(ImageFilter.MedianFilter(3))
     # Return enhanced grayscale image
     return enhanced

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 # Generated requirements for Hugging Face Spaces deployment
-streamlit>=1.28.0
 mistralai>=0.0.3
 Pillow>=9.0.0
 opencv-python-headless>=4.5.0

 # Generated requirements for Hugging Face Spaces deployment
+streamlit>=1.20.0
 mistralai>=0.0.3
 Pillow>=9.0.0
 opencv-python-headless>=4.5.0

ui/custom.css CHANGED Viewed

@@ -64,4 +64,36 @@
     font-size: 1.3rem;
     font-weight: bold;
     margin-bottom: 15px;
 }

     font-size: 1.3rem;
     font-weight: bold;
     margin-bottom: 15px;
+}
+/* Fix for image preprocessing preview */
+.stExpander {
+    overflow: hidden !important;
+}
+.stExpander img {
+    max-width: 100% !important;
+    height: auto !important;
+    object-fit: contain !important;
+}
+/* Additional image fixes for all containers */
+.document-content img,
+.markdown-text-container img,
+.page-text-content img,
+.image-container img {
+    max-width: 100% !important;
+    height: auto !important;
+    object-fit: contain !important;
+}
+/* Responsive design rules */
+@media (max-width: 768px) {
+    .stExpander img,
+    .document-content img,
+    .markdown-text-container img,
+    .page-text-content img,
+    .image-container img {
+        max-width: 95% !important;
+    }
 }