Spaces:

milwright
/

historical-ocr

Running

App Files Files Community

historical-ocr / test_segmentation_fix.py

milwright

Consolidate segmentation improvements and code cleanup

42dc069 3 months ago

raw

history blame

4.21 kB

	"""
	Test script to verify the segmentation and OCR improvements.
	This script will process an image using the updated segmentation algorithm
	and show how text recognition is prioritized over images.
	"""

	import os
	import json
	import tempfile
	from pathlib import Path
	from PIL import Image

	# Import the key components we modified
	from image_segmentation import segment_image_for_ocr
	from ocr_processing import process_file, process_result
	from utils.image_utils import clean_ocr_result
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def run_test(image_path):
	"""Run a test on the specified image to verify our fixes"""
	print(f"Testing image segmentation and OCR prioritization on: {image_path}")
	print("-" * 80)

	# Make sure the image exists
	if not os.path.exists(image_path):
	print(f"Error: Image not found at {image_path}")
	return

	# 1. First run image segmentation directly
	try:
	print("Step 1: Running image segmentation...")
	segmentation_results = segment_image_for_ocr(
	image_path,
	vision_enabled=True,
	preserve_content=True
	)

	# Print segmentation info
	text_regions_count = len(segmentation_results.get('text_regions_coordinates', []))
	print(f"Detected {text_regions_count} text regions in the image")

	# Save output images for inspection
	output_dir = Path("output/segmentation_test")
	output_dir.mkdir(parents=True, exist_ok=True)

	if segmentation_results['text_regions'] is not None:
	output_path = output_dir / f"text_regions_improved.jpg"
	segmentation_results['text_regions'].save(output_path)
	print(f"Saved text regions visualization to: {output_path}")

	if segmentation_results['image_regions'] is not None:
	output_path = output_dir / f"image_regions_improved.jpg"
	segmentation_results['image_regions'].save(output_path)
	print(f"Saved image regions visualization to: {output_path}")

	if segmentation_results['combined_result'] is not None:
	output_path = output_dir / f"combined_result_improved.jpg"
	segmentation_results['combined_result'].save(output_path)
	print(f"Saved combined result to: {output_path}")

	# Extract individual text regions if available
	if 'region_images' in segmentation_results and segmentation_results['region_images']:
	region_dir = output_dir / "text_regions"
	region_dir.mkdir(exist_ok=True)

	for idx, region_info in enumerate(segmentation_results['region_images']):
	region_path = region_dir / f"region_{idx+1}.jpg"
	region_info['pil_image'].save(region_path)

	print(f"Saved {len(segmentation_results['region_images'])} individual text regions to {region_dir}")
	except Exception as e:
	print(f"Error during segmentation: {str(e)}")

	print("-" * 80)
	print("Test complete. Check the output directory for results.")
	print("The text regions should now properly include all text content in the document.")
	print("Image regions should be minimal and not contain text.")

	if __name__ == "__main__":
	# Test with an image that has mixed text and image content
	# You can change this to any image path you want to test
	test_image = "input/baldwin-letter.jpg"
	if not os.path.exists(test_image):
	print(f"Test image not found at {test_image}, looking for alternatives...")

	# Try to find an alternative test image
	for potential_img in ["input/harpers.pdf", "input/magician-or-bottle-cungerer.jpg", "input/magellan-travels.jpg"]:
	if os.path.exists(potential_img):
	test_image = potential_img
	print(f"Using alternative test image: {test_image}")
	break

	if os.path.exists(test_image):
	run_test(test_image)
	else:
	print("No suitable test images found. Please place an image in the input directory.")