Spaces:
Running
Running
Update historical-ocr application with enhanced features
Browse files- README.md +1 -12
- app.py +7 -7
- config.py +2 -1
- input/baldwin-letters-combined.jpg +3 -0
- input/revere.jpg +3 -0
- ocr_utils.py +151 -84
- requirements.txt +1 -1
- ui/custom.css +32 -0
README.md
CHANGED
@@ -1,14 +1,3 @@
|
|
1 |
-
---
|
2 |
-
title: Historical OCR with Contextual Intelligence
|
3 |
-
emoji: 📜
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: purple
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: "1.28.0"
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
# Historical OCR with Contextual Intelligence
|
13 |
|
14 |
An advanced OCR application for historical document analysis using Mistral AI.
|
@@ -43,4 +32,4 @@ Built with Streamlit and Mistral AI's OCR and large language model capabilities.
|
|
43 |
|
44 |
---
|
45 |
|
46 |
-
Created by
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Historical OCR with Contextual Intelligence
|
2 |
|
3 |
An advanced OCR application for historical document analysis using Mistral AI.
|
|
|
32 |
|
33 |
---
|
34 |
|
35 |
+
Created by [Add your name/organization]
|
app.py
CHANGED
@@ -827,7 +827,7 @@ with main_tab2:
|
|
827 |
images = page.get('images', [])
|
828 |
for img in images:
|
829 |
if 'image_base64' in img:
|
830 |
-
st.image(img['image_base64'],
|
831 |
|
832 |
# Display text content if available
|
833 |
text_content = page.get('markdown', '')
|
@@ -925,7 +925,7 @@ with main_tab1:
|
|
925 |
|
926 |
# Process button - flush left with similar padding as file browser
|
927 |
with left_col:
|
928 |
-
process_button = st.button("Process Document")
|
929 |
|
930 |
# Image preprocessing preview in upload column, right after the process button
|
931 |
if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
|
@@ -934,13 +934,13 @@ with main_tab1:
|
|
934 |
|
935 |
with preview_cols[0]:
|
936 |
st.markdown("**Original Image**")
|
937 |
-
st.image(uploaded_file,
|
938 |
|
939 |
with preview_cols[1]:
|
940 |
st.markdown("**Preprocessed Image**")
|
941 |
try:
|
942 |
processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
|
943 |
-
st.image(io.BytesIO(processed_bytes),
|
944 |
except Exception as e:
|
945 |
st.error(f"Error in preprocessing: {str(e)}")
|
946 |
st.info("Try using grayscale preprocessing for PNG images with transparency")
|
@@ -1636,7 +1636,7 @@ with main_tab1:
|
|
1636 |
with columns1[i]:
|
1637 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1638 |
try:
|
1639 |
-
st.image(str(img_path), caption=img_path.name,
|
1640 |
except Exception:
|
1641 |
st.info(f"Example: {img_path.name}")
|
1642 |
else:
|
@@ -1649,7 +1649,7 @@ with main_tab1:
|
|
1649 |
with columns2[i]:
|
1650 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1651 |
try:
|
1652 |
-
st.image(str(img_path), caption=img_path.name,
|
1653 |
except Exception:
|
1654 |
st.info(f"Example: {img_path.name}")
|
1655 |
else:
|
@@ -1662,7 +1662,7 @@ with main_tab1:
|
|
1662 |
with columns[i % len(columns)]:
|
1663 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1664 |
try:
|
1665 |
-
st.image(str(img_path), caption=img_path.name,
|
1666 |
except Exception:
|
1667 |
st.info(f"Example: {img_path.name}")
|
1668 |
else:
|
|
|
827 |
images = page.get('images', [])
|
828 |
for img in images:
|
829 |
if 'image_base64' in img:
|
830 |
+
st.image(img['image_base64'], use_container_width=True)
|
831 |
|
832 |
# Display text content if available
|
833 |
text_content = page.get('markdown', '')
|
|
|
925 |
|
926 |
# Process button - flush left with similar padding as file browser
|
927 |
with left_col:
|
928 |
+
process_button = st.button("Process Document", use_container_width=True)
|
929 |
|
930 |
# Image preprocessing preview in upload column, right after the process button
|
931 |
if any(preprocessing_options.values()) and uploaded_file.type.startswith('image/'):
|
|
|
934 |
|
935 |
with preview_cols[0]:
|
936 |
st.markdown("**Original Image**")
|
937 |
+
st.image(uploaded_file, use_container_width=True)
|
938 |
|
939 |
with preview_cols[1]:
|
940 |
st.markdown("**Preprocessed Image**")
|
941 |
try:
|
942 |
processed_bytes = preprocess_image(uploaded_file.getvalue(), preprocessing_options)
|
943 |
+
st.image(io.BytesIO(processed_bytes), use_container_width=True)
|
944 |
except Exception as e:
|
945 |
st.error(f"Error in preprocessing: {str(e)}")
|
946 |
st.info("Try using grayscale preprocessing for PNG images with transparency")
|
|
|
1636 |
with columns1[i]:
|
1637 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1638 |
try:
|
1639 |
+
st.image(str(img_path), caption=img_path.name, use_container_width=True)
|
1640 |
except Exception:
|
1641 |
st.info(f"Example: {img_path.name}")
|
1642 |
else:
|
|
|
1649 |
with columns2[i]:
|
1650 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1651 |
try:
|
1652 |
+
st.image(str(img_path), caption=img_path.name, use_container_width=True)
|
1653 |
except Exception:
|
1654 |
st.info(f"Example: {img_path.name}")
|
1655 |
else:
|
|
|
1662 |
with columns[i % len(columns)]:
|
1663 |
if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif']:
|
1664 |
try:
|
1665 |
+
st.image(str(img_path), caption=img_path.name, use_container_width=True)
|
1666 |
except Exception:
|
1667 |
st.info(f"Example: {img_path.name}")
|
1668 |
else:
|
config.py
CHANGED
@@ -22,7 +22,8 @@ MISTRAL_API_KEY = os.environ.get("HF_MISTRAL_API_KEY",
|
|
22 |
os.environ.get("MISTRAL_API_KEY", "")).strip()
|
23 |
|
24 |
# Check if we're in test mode (allows operation without valid API key)
|
25 |
-
|
|
|
26 |
|
27 |
# Just check if API key exists
|
28 |
if not MISTRAL_API_KEY and not TEST_MODE:
|
|
|
22 |
os.environ.get("MISTRAL_API_KEY", "")).strip()
|
23 |
|
24 |
# Check if we're in test mode (allows operation without valid API key)
|
25 |
+
# Enable test mode for diagnosing OCR issues
|
26 |
+
TEST_MODE = True
|
27 |
|
28 |
# Just check if API key exists
|
29 |
if not MISTRAL_API_KEY and not TEST_MODE:
|
input/baldwin-letters-combined.jpg
ADDED
![]() |
Git LFS Details
|
input/revere.jpg
ADDED
![]() |
Git LFS Details
|
ocr_utils.py
CHANGED
@@ -9,6 +9,7 @@ import io
|
|
9 |
import zipfile
|
10 |
import logging
|
11 |
import numpy as np
|
|
|
12 |
from datetime import datetime
|
13 |
from pathlib import Path
|
14 |
from typing import Dict, List, Optional, Union, Any, Tuple
|
@@ -554,6 +555,8 @@ def _detect_document_type_impl(img_hash=None) -> bool:
|
|
554 |
"""
|
555 |
Optimized implementation of document type detection for faster processing.
|
556 |
The img_hash parameter is unused but kept for backward compatibility.
|
|
|
|
|
557 |
"""
|
558 |
# Fast path: Get the image from thread-local storage
|
559 |
if not hasattr(_detect_document_type_impl, "_current_img"):
|
@@ -566,26 +569,6 @@ def _detect_document_type_impl(img_hash=None) -> bool:
|
|
566 |
if width * height < 100000: # Approx 300x300 or smaller
|
567 |
return False
|
568 |
|
569 |
-
# Quick check: If image has many colors, it's likely not a document
|
570 |
-
# Sample a subset of pixels for color analysis (faster than full histogram)
|
571 |
-
try:
|
572 |
-
# Sample pixels in a grid pattern
|
573 |
-
color_samples = []
|
574 |
-
for x in range(0, width, max(1, width // 10)):
|
575 |
-
for y in range(0, height, max(1, height // 10)):
|
576 |
-
try:
|
577 |
-
color_samples.append(img.getpixel((x, y)))
|
578 |
-
except:
|
579 |
-
pass
|
580 |
-
|
581 |
-
# Count unique colors in the sample
|
582 |
-
if img.mode == 'RGB':
|
583 |
-
unique_colors = len(set(color_samples))
|
584 |
-
if unique_colors > 1000: # Many unique colors suggest a photo, not a document
|
585 |
-
return False
|
586 |
-
except:
|
587 |
-
pass # If sampling fails, continue with regular analysis
|
588 |
-
|
589 |
# Convert to grayscale for analysis (using faster conversion)
|
590 |
gray_img = img.convert('L')
|
591 |
|
@@ -609,7 +592,7 @@ def _detect_document_type_impl(img_hash=None) -> bool:
|
|
609 |
|
610 |
# Count edge pixels using threshold (faster than summing individual pixels)
|
611 |
edge_data = edges.getdata()
|
612 |
-
edge_threshold =
|
613 |
|
614 |
# Use list comprehension for better performance
|
615 |
edge_count = sum(1 for p in edge_data if p > edge_threshold)
|
@@ -621,18 +604,17 @@ def _detect_document_type_impl(img_hash=None) -> bool:
|
|
621 |
bright_ratio = bright_count / (width * height)
|
622 |
|
623 |
# Documents typically have more edges (text boundaries) and bright areas (background)
|
624 |
-
|
|
|
625 |
|
626 |
-
# OpenCV path - optimized for speed
|
627 |
img_np = np.array(gray_img)
|
628 |
|
629 |
-
# Fast document detection heuristics
|
630 |
-
|
631 |
# 1. Fast check: Variance of pixel values
|
632 |
-
# Documents typically have high variance (
|
633 |
-
#
|
634 |
std_dev = np.std(img_np)
|
635 |
-
if std_dev >
|
636 |
return True
|
637 |
|
638 |
# 2. Quick check using downsampled image for edges
|
@@ -643,22 +625,38 @@ def _detect_document_type_impl(img_hash=None) -> bool:
|
|
643 |
else:
|
644 |
small_img = img_np
|
645 |
|
646 |
-
# Use
|
647 |
-
|
|
|
648 |
edge_ratio = np.count_nonzero(edges) / edges.size
|
649 |
|
650 |
# 3. Fast histogram approximation using bins
|
651 |
# Instead of calculating full histogram, use bins for dark and light regions
|
652 |
-
|
653 |
-
|
|
|
654 |
|
655 |
dark_ratio = np.count_nonzero(dark_mask) / img_np.size
|
656 |
light_ratio = np.count_nonzero(light_mask) / img_np.size
|
657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
# Combine heuristics for final decision
|
659 |
# Documents typically have both dark (text) and light (background) regions,
|
660 |
# and/or well-defined edges
|
661 |
-
|
|
|
662 |
|
663 |
# Removed caching to fix unhashable type error
|
664 |
def preprocess_document_image(img: Image.Image) -> Image.Image:
|
@@ -678,7 +676,8 @@ def preprocess_document_image(img: Image.Image) -> Image.Image:
|
|
678 |
|
679 |
def _preprocess_document_image_impl() -> Image.Image:
|
680 |
"""
|
681 |
-
Optimized implementation of document preprocessing with adaptive processing based on image size
|
|
|
682 |
"""
|
683 |
# Fast path: Get image from thread-local storage
|
684 |
if not hasattr(preprocess_document_image, "_current_img"):
|
@@ -690,94 +689,162 @@ def _preprocess_document_image_impl() -> Image.Image:
|
|
690 |
width, height = img.size
|
691 |
img_size = width * height
|
692 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
693 |
# Ultra-fast path for tiny images - just convert to grayscale with contrast enhancement
|
694 |
if img_size < 300000: # ~500x600 or smaller
|
695 |
gray = img.convert('L')
|
|
|
|
|
696 |
enhancer = ImageEnhance.Contrast(gray)
|
697 |
-
return enhancer.enhance(
|
698 |
|
699 |
# Fast path for small images - minimal processing
|
700 |
if img_size < 1000000: # ~1000x1000 or smaller
|
701 |
gray = img.convert('L')
|
|
|
|
|
702 |
enhancer = ImageEnhance.Contrast(gray)
|
703 |
-
enhanced = enhancer.enhance(
|
|
|
704 |
# Light sharpening only if sharpen is enabled
|
|
|
705 |
if IMAGE_PREPROCESSING["sharpen"]:
|
706 |
-
|
|
|
|
|
|
|
|
|
707 |
return enhanced
|
708 |
|
709 |
# Standard path for medium images
|
710 |
# Convert to grayscale (faster processing)
|
711 |
gray = img.convert('L')
|
712 |
|
713 |
-
#
|
|
|
714 |
enhancer = ImageEnhance.Contrast(gray)
|
715 |
-
enhanced = enhancer.enhance(
|
716 |
|
717 |
-
# Apply light sharpening for text clarity
|
718 |
if IMAGE_PREPROCESSING["sharpen"]:
|
719 |
-
|
|
|
|
|
|
|
|
|
720 |
|
721 |
-
# Advanced processing
|
722 |
-
|
723 |
-
if img_size > 1500000 and CV2_AVAILABLE and IMAGE_PREPROCESSING["denoise"]:
|
724 |
try:
|
725 |
# Convert to numpy array for OpenCV processing
|
726 |
img_np = np.array(enhanced)
|
727 |
|
728 |
-
|
729 |
-
|
730 |
-
#
|
731 |
-
|
732 |
-
|
733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
734 |
|
735 |
-
#
|
736 |
-
|
737 |
-
template_window = 5
|
738 |
-
search_window = 13
|
739 |
|
740 |
-
#
|
741 |
-
|
742 |
|
743 |
-
# Resize back to original size
|
744 |
-
denoised_np = cv2.resize(denoised_np, (width, height), interpolation=cv2.INTER_LINEAR)
|
745 |
else:
|
746 |
-
#
|
747 |
-
|
748 |
-
|
749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
750 |
|
751 |
-
#
|
752 |
-
|
753 |
-
|
754 |
-
# Convert back to PIL Image
|
755 |
-
enhanced = Image.fromarray(denoised_np)
|
756 |
-
|
757 |
-
# Apply adaptive thresholding only if it improves text visibility
|
758 |
-
# Create a binarized version of the image
|
759 |
-
if img_size < 8000000: # Skip for extremely large images to save processing time
|
760 |
-
binary = cv2.adaptiveThreshold(denoised_np, 255,
|
761 |
-
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
762 |
-
cv2.THRESH_BINARY, 11, 2)
|
763 |
|
764 |
-
#
|
765 |
-
#
|
766 |
-
|
767 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
|
769 |
-
# Check if binary preserves reasonable amount of white pixels (background)
|
770 |
-
if white_pixels_binary > white_pixels_orig * 0.8:
|
771 |
-
# Binarization looks good, use it
|
772 |
-
return Image.fromarray(binary)
|
773 |
except Exception as e:
|
774 |
# If OpenCV processing fails, continue with PIL-enhanced image
|
775 |
pass
|
776 |
|
777 |
elif IMAGE_PREPROCESSING["denoise"]:
|
778 |
# Fallback PIL denoising for systems without OpenCV
|
779 |
-
|
780 |
-
|
|
|
|
|
|
|
|
|
|
|
781 |
|
782 |
# Return enhanced grayscale image
|
783 |
return enhanced
|
|
|
9 |
import zipfile
|
10 |
import logging
|
11 |
import numpy as np
|
12 |
+
import time
|
13 |
from datetime import datetime
|
14 |
from pathlib import Path
|
15 |
from typing import Dict, List, Optional, Union, Any, Tuple
|
|
|
555 |
"""
|
556 |
Optimized implementation of document type detection for faster processing.
|
557 |
The img_hash parameter is unused but kept for backward compatibility.
|
558 |
+
|
559 |
+
Enhanced to better detect handwritten documents.
|
560 |
"""
|
561 |
# Fast path: Get the image from thread-local storage
|
562 |
if not hasattr(_detect_document_type_impl, "_current_img"):
|
|
|
569 |
if width * height < 100000: # Approx 300x300 or smaller
|
570 |
return False
|
571 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
# Convert to grayscale for analysis (using faster conversion)
|
573 |
gray_img = img.convert('L')
|
574 |
|
|
|
592 |
|
593 |
# Count edge pixels using threshold (faster than summing individual pixels)
|
594 |
edge_data = edges.getdata()
|
595 |
+
edge_threshold = 40 # Lowered threshold to better detect handwritten texts
|
596 |
|
597 |
# Use list comprehension for better performance
|
598 |
edge_count = sum(1 for p in edge_data if p > edge_threshold)
|
|
|
604 |
bright_ratio = bright_count / (width * height)
|
605 |
|
606 |
# Documents typically have more edges (text boundaries) and bright areas (background)
|
607 |
+
# Lowered edge threshold to better detect handwritten documents
|
608 |
+
return edge_ratio > 0.035 or bright_ratio > 0.4
|
609 |
|
610 |
+
# OpenCV path - optimized for speed and enhanced for handwritten documents
|
611 |
img_np = np.array(gray_img)
|
612 |
|
|
|
|
|
613 |
# 1. Fast check: Variance of pixel values
|
614 |
+
# Documents typically have high variance (text on background)
|
615 |
+
# Handwritten documents may have less contrast than printed text
|
616 |
std_dev = np.std(img_np)
|
617 |
+
if std_dev > 45: # Lowered threshold to better detect handwritten documents
|
618 |
return True
|
619 |
|
620 |
# 2. Quick check using downsampled image for edges
|
|
|
625 |
else:
|
626 |
small_img = img_np
|
627 |
|
628 |
+
# Use adaptive edge detection parameters for handwritten documents
|
629 |
+
# Lowered threshold to better detect fainter handwritten text
|
630 |
+
edges = cv2.Canny(small_img, 30, 130, L2gradient=False)
|
631 |
edge_ratio = np.count_nonzero(edges) / edges.size
|
632 |
|
633 |
# 3. Fast histogram approximation using bins
|
634 |
# Instead of calculating full histogram, use bins for dark and light regions
|
635 |
+
# Adjusted for handwritten documents which may have more gray values
|
636 |
+
dark_mask = img_np < 60 # Increased threshold to capture lighter handwritten text
|
637 |
+
light_mask = img_np > 180 # Lowered threshold to account for aged paper
|
638 |
|
639 |
dark_ratio = np.count_nonzero(dark_mask) / img_np.size
|
640 |
light_ratio = np.count_nonzero(light_mask) / img_np.size
|
641 |
|
642 |
+
# Special analysis for handwritten documents
|
643 |
+
# Check for line-like structures typical in handwritten text
|
644 |
+
if CV2_AVAILABLE and edge_ratio > 0.02: # Lower threshold to capture handwritten documents
|
645 |
+
# Try to find line segments that could indicate text lines
|
646 |
+
lines = cv2.HoughLinesP(edges, 1, np.pi/180,
|
647 |
+
threshold=50, # Lower threshold for detection
|
648 |
+
minLineLength=30, # Shorter lines for handwriting
|
649 |
+
maxLineGap=20) # Larger gap for discontinuous handwriting
|
650 |
+
|
651 |
+
# If we find enough line segments, it's likely a document with text
|
652 |
+
if lines is not None and len(lines) > 10:
|
653 |
+
return True
|
654 |
+
|
655 |
# Combine heuristics for final decision
|
656 |
# Documents typically have both dark (text) and light (background) regions,
|
657 |
# and/or well-defined edges
|
658 |
+
# Lower thresholds for handwritten documents
|
659 |
+
return (dark_ratio > 0.03 and light_ratio > 0.25) or edge_ratio > 0.03
|
660 |
|
661 |
# Removed caching to fix unhashable type error
|
662 |
def preprocess_document_image(img: Image.Image) -> Image.Image:
|
|
|
676 |
|
677 |
def _preprocess_document_image_impl() -> Image.Image:
|
678 |
"""
|
679 |
+
Optimized implementation of document preprocessing with adaptive processing based on image size.
|
680 |
+
Enhanced for better handwritten document processing.
|
681 |
"""
|
682 |
# Fast path: Get image from thread-local storage
|
683 |
if not hasattr(preprocess_document_image, "_current_img"):
|
|
|
689 |
width, height = img.size
|
690 |
img_size = width * height
|
691 |
|
692 |
+
# Check if the image might be a handwritten document - use special processing
|
693 |
+
is_handwritten = False
|
694 |
+
try:
|
695 |
+
# Simple check for handwritten document characteristics
|
696 |
+
# Handwritten documents often have more varied strokes and less stark contrast
|
697 |
+
if CV2_AVAILABLE:
|
698 |
+
# Convert to grayscale and calculate local variance
|
699 |
+
gray_np = np.array(img.convert('L'))
|
700 |
+
# Higher variance in edge strengths can indicate handwriting
|
701 |
+
edges = cv2.Canny(gray_np, 30, 100)
|
702 |
+
if np.count_nonzero(edges) / edges.size > 0.02: # Low edge threshold for handwriting
|
703 |
+
# Additional check with gradient magnitudes
|
704 |
+
sobelx = cv2.Sobel(gray_np, cv2.CV_64F, 1, 0, ksize=3)
|
705 |
+
sobely = cv2.Sobel(gray_np, cv2.CV_64F, 0, 1, ksize=3)
|
706 |
+
magnitude = np.sqrt(sobelx**2 + sobely**2)
|
707 |
+
# Handwriting typically has more variation in gradient magnitudes
|
708 |
+
if np.std(magnitude) > 20:
|
709 |
+
is_handwritten = True
|
710 |
+
except:
|
711 |
+
# If detection fails, assume it's not handwritten
|
712 |
+
pass
|
713 |
+
|
714 |
# Ultra-fast path for tiny images - just convert to grayscale with contrast enhancement
|
715 |
if img_size < 300000: # ~500x600 or smaller
|
716 |
gray = img.convert('L')
|
717 |
+
# Lower contrast enhancement for handwritten documents
|
718 |
+
contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
|
719 |
enhancer = ImageEnhance.Contrast(gray)
|
720 |
+
return enhancer.enhance(contrast_level)
|
721 |
|
722 |
# Fast path for small images - minimal processing
|
723 |
if img_size < 1000000: # ~1000x1000 or smaller
|
724 |
gray = img.convert('L')
|
725 |
+
# Use gentler contrast enhancement for handwritten documents
|
726 |
+
contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
|
727 |
enhancer = ImageEnhance.Contrast(gray)
|
728 |
+
enhanced = enhancer.enhance(contrast_level)
|
729 |
+
|
730 |
# Light sharpening only if sharpen is enabled
|
731 |
+
# Use milder sharpening for handwritten documents to preserve stroke detail
|
732 |
if IMAGE_PREPROCESSING["sharpen"]:
|
733 |
+
if is_handwritten:
|
734 |
+
# Use edge enhancement which is gentler than SHARPEN for handwriting
|
735 |
+
enhanced = enhanced.filter(ImageFilter.EDGE_ENHANCE)
|
736 |
+
else:
|
737 |
+
enhanced = enhanced.filter(ImageFilter.SHARPEN)
|
738 |
return enhanced
|
739 |
|
740 |
# Standard path for medium images
|
741 |
# Convert to grayscale (faster processing)
|
742 |
gray = img.convert('L')
|
743 |
|
744 |
+
# Adaptive contrast enhancement based on document type
|
745 |
+
contrast_level = 1.4 if is_handwritten else IMAGE_PREPROCESSING["enhance_contrast"]
|
746 |
enhancer = ImageEnhance.Contrast(gray)
|
747 |
+
enhanced = enhancer.enhance(contrast_level)
|
748 |
|
749 |
+
# Apply light sharpening for text clarity - adapt based on document type
|
750 |
if IMAGE_PREPROCESSING["sharpen"]:
|
751 |
+
if is_handwritten:
|
752 |
+
# Use edge enhancement which is gentler than SHARPEN for handwriting
|
753 |
+
enhanced = enhanced.filter(ImageFilter.EDGE_ENHANCE)
|
754 |
+
else:
|
755 |
+
enhanced = enhanced.filter(ImageFilter.SHARPEN)
|
756 |
|
757 |
+
# Advanced processing with OpenCV if available
|
758 |
+
if CV2_AVAILABLE and IMAGE_PREPROCESSING["denoise"]:
|
|
|
759 |
try:
|
760 |
# Convert to numpy array for OpenCV processing
|
761 |
img_np = np.array(enhanced)
|
762 |
|
763 |
+
if is_handwritten:
|
764 |
+
# Special treatment for handwritten documents
|
765 |
+
# Use guided filter which preserves edges better than NLMeans
|
766 |
+
# Guided filter works well for handwriting by preserving stroke details
|
767 |
+
if img_size > 3000000: # Large images - downsample first
|
768 |
+
scale_factor = 0.5
|
769 |
+
small_img = cv2.resize(img_np, None, fx=scale_factor, fy=scale_factor,
|
770 |
+
interpolation=cv2.INTER_AREA)
|
771 |
+
# Apply bilateral filter which preserves edges while smoothing
|
772 |
+
filtered = cv2.bilateralFilter(small_img, 9, 75, 75)
|
773 |
+
# Resize back
|
774 |
+
filtered = cv2.resize(filtered, (width, height), interpolation=cv2.INTER_LINEAR)
|
775 |
+
else:
|
776 |
+
# Use bilateral filter directly for smaller images
|
777 |
+
filtered = cv2.bilateralFilter(img_np, 7, 50, 50)
|
778 |
|
779 |
+
# Convert back to PIL Image
|
780 |
+
enhanced = Image.fromarray(filtered)
|
|
|
|
|
781 |
|
782 |
+
# For handwritten docs, avoid binary thresholding which can destroy subtle strokes
|
783 |
+
return enhanced
|
784 |
|
|
|
|
|
785 |
else:
|
786 |
+
# Standard document processing - optimized for printed text
|
787 |
+
# Optimize denoising parameters based on image size
|
788 |
+
if img_size > 4000000: # Very large images
|
789 |
+
# More aggressive downsampling for very large images
|
790 |
+
scale_factor = 0.5
|
791 |
+
downsample = cv2.resize(img_np, None, fx=scale_factor, fy=scale_factor,
|
792 |
+
interpolation=cv2.INTER_AREA)
|
793 |
+
|
794 |
+
# Lighter denoising for downsampled image
|
795 |
+
h_value = 7 # Strength parameter
|
796 |
+
template_window = 5
|
797 |
+
search_window = 13
|
798 |
+
|
799 |
+
# Apply denoising on smaller image
|
800 |
+
denoised_np = cv2.fastNlMeansDenoising(downsample, None, h_value, template_window, search_window)
|
801 |
+
|
802 |
+
# Resize back to original size
|
803 |
+
denoised_np = cv2.resize(denoised_np, (width, height), interpolation=cv2.INTER_LINEAR)
|
804 |
+
else:
|
805 |
+
# Direct denoising for medium-large images
|
806 |
+
h_value = 8 # Balanced for speed and quality
|
807 |
+
template_window = 5
|
808 |
+
search_window = 15
|
809 |
+
|
810 |
+
# Apply denoising
|
811 |
+
denoised_np = cv2.fastNlMeansDenoising(img_np, None, h_value, template_window, search_window)
|
812 |
|
813 |
+
# Convert back to PIL Image
|
814 |
+
enhanced = Image.fromarray(denoised_np)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
815 |
|
816 |
+
# Apply adaptive thresholding only if it improves text visibility
|
817 |
+
# Create a binarized version of the image
|
818 |
+
if img_size < 8000000: # Skip for extremely large images to save processing time
|
819 |
+
binary = cv2.adaptiveThreshold(denoised_np, 255,
|
820 |
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
821 |
+
cv2.THRESH_BINARY, 11, 2)
|
822 |
+
|
823 |
+
# Quick verification that binarization preserves text information
|
824 |
+
# Use simplified check that works well for document images
|
825 |
+
white_pixels_binary = np.count_nonzero(binary > 200)
|
826 |
+
white_pixels_orig = np.count_nonzero(denoised_np > 200)
|
827 |
+
|
828 |
+
# Check if binary preserves reasonable amount of white pixels (background)
|
829 |
+
if white_pixels_binary > white_pixels_orig * 0.8:
|
830 |
+
# Binarization looks good, use it
|
831 |
+
return Image.fromarray(binary)
|
832 |
+
|
833 |
+
return enhanced
|
834 |
|
|
|
|
|
|
|
|
|
835 |
except Exception as e:
|
836 |
# If OpenCV processing fails, continue with PIL-enhanced image
|
837 |
pass
|
838 |
|
839 |
elif IMAGE_PREPROCESSING["denoise"]:
|
840 |
# Fallback PIL denoising for systems without OpenCV
|
841 |
+
if is_handwritten:
|
842 |
+
# Lighter filtering for handwritten text to preserve details
|
843 |
+
# Use a smaller median filter for handwritten documents
|
844 |
+
enhanced = enhanced.filter(ImageFilter.MedianFilter(1))
|
845 |
+
else:
|
846 |
+
# Standard filtering for printed documents
|
847 |
+
enhanced = enhanced.filter(ImageFilter.MedianFilter(3))
|
848 |
|
849 |
# Return enhanced grayscale image
|
850 |
return enhanced
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# Generated requirements for Hugging Face Spaces deployment
|
2 |
|
3 |
-
streamlit>=1.
|
4 |
mistralai>=0.0.3
|
5 |
Pillow>=9.0.0
|
6 |
opencv-python-headless>=4.5.0
|
|
|
1 |
# Generated requirements for Hugging Face Spaces deployment
|
2 |
|
3 |
+
streamlit>=1.20.0
|
4 |
mistralai>=0.0.3
|
5 |
Pillow>=9.0.0
|
6 |
opencv-python-headless>=4.5.0
|
ui/custom.css
CHANGED
@@ -64,4 +64,36 @@
|
|
64 |
font-size: 1.3rem;
|
65 |
font-weight: bold;
|
66 |
margin-bottom: 15px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
}
|
|
|
64 |
font-size: 1.3rem;
|
65 |
font-weight: bold;
|
66 |
margin-bottom: 15px;
|
67 |
+
}
|
68 |
+
|
69 |
+
/* Fix for image preprocessing preview */
|
70 |
+
.stExpander {
|
71 |
+
overflow: hidden !important;
|
72 |
+
}
|
73 |
+
|
74 |
+
.stExpander img {
|
75 |
+
max-width: 100% !important;
|
76 |
+
height: auto !important;
|
77 |
+
object-fit: contain !important;
|
78 |
+
}
|
79 |
+
|
80 |
+
/* Additional image fixes for all containers */
|
81 |
+
.document-content img,
|
82 |
+
.markdown-text-container img,
|
83 |
+
.page-text-content img,
|
84 |
+
.image-container img {
|
85 |
+
max-width: 100% !important;
|
86 |
+
height: auto !important;
|
87 |
+
object-fit: contain !important;
|
88 |
+
}
|
89 |
+
|
90 |
+
/* Responsive design rules */
|
91 |
+
@media (max-width: 768px) {
|
92 |
+
.stExpander img,
|
93 |
+
.document-content img,
|
94 |
+
.markdown-text-container img,
|
95 |
+
.page-text-content img,
|
96 |
+
.image-container img {
|
97 |
+
max-width: 95% !important;
|
98 |
+
}
|
99 |
}
|