historical-ocr / test_segmentation_fix.py
milwright's picture
Consolidate segmentation improvements and code cleanup
42dc069
raw
history blame
4.21 kB
"""
Test script to verify the segmentation and OCR improvements.
This script will process an image using the updated segmentation algorithm
and show how text recognition is prioritized over images.
"""
import os
import json
import tempfile
from pathlib import Path
from PIL import Image
# Import the key components we modified
from image_segmentation import segment_image_for_ocr
from ocr_processing import process_file, process_result
from utils.image_utils import clean_ocr_result
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def run_test(image_path):
"""Run a test on the specified image to verify our fixes"""
print(f"Testing image segmentation and OCR prioritization on: {image_path}")
print("-" * 80)
# Make sure the image exists
if not os.path.exists(image_path):
print(f"Error: Image not found at {image_path}")
return
# 1. First run image segmentation directly
try:
print("Step 1: Running image segmentation...")
segmentation_results = segment_image_for_ocr(
image_path,
vision_enabled=True,
preserve_content=True
)
# Print segmentation info
text_regions_count = len(segmentation_results.get('text_regions_coordinates', []))
print(f"Detected {text_regions_count} text regions in the image")
# Save output images for inspection
output_dir = Path("output/segmentation_test")
output_dir.mkdir(parents=True, exist_ok=True)
if segmentation_results['text_regions'] is not None:
output_path = output_dir / f"text_regions_improved.jpg"
segmentation_results['text_regions'].save(output_path)
print(f"Saved text regions visualization to: {output_path}")
if segmentation_results['image_regions'] is not None:
output_path = output_dir / f"image_regions_improved.jpg"
segmentation_results['image_regions'].save(output_path)
print(f"Saved image regions visualization to: {output_path}")
if segmentation_results['combined_result'] is not None:
output_path = output_dir / f"combined_result_improved.jpg"
segmentation_results['combined_result'].save(output_path)
print(f"Saved combined result to: {output_path}")
# Extract individual text regions if available
if 'region_images' in segmentation_results and segmentation_results['region_images']:
region_dir = output_dir / "text_regions"
region_dir.mkdir(exist_ok=True)
for idx, region_info in enumerate(segmentation_results['region_images']):
region_path = region_dir / f"region_{idx+1}.jpg"
region_info['pil_image'].save(region_path)
print(f"Saved {len(segmentation_results['region_images'])} individual text regions to {region_dir}")
except Exception as e:
print(f"Error during segmentation: {str(e)}")
print("-" * 80)
print("Test complete. Check the output directory for results.")
print("The text regions should now properly include all text content in the document.")
print("Image regions should be minimal and not contain text.")
if __name__ == "__main__":
# Test with an image that has mixed text and image content
# You can change this to any image path you want to test
test_image = "input/baldwin-letter.jpg"
if not os.path.exists(test_image):
print(f"Test image not found at {test_image}, looking for alternatives...")
# Try to find an alternative test image
for potential_img in ["input/harpers.pdf", "input/magician-or-bottle-cungerer.jpg", "input/magellan-travels.jpg"]:
if os.path.exists(potential_img):
test_image = potential_img
print(f"Using alternative test image: {test_image}")
break
if os.path.exists(test_image):
run_test(test_image)
else:
print("No suitable test images found. Please place an image in the input directory.")