milwright commited on
Commit
51f51ba
·
verified ·
1 Parent(s): 8b46e90

Delete test_segmentation_fix.py

Browse files
Files changed (1) hide show
  1. test_segmentation_fix.py +0 -100
test_segmentation_fix.py DELETED
@@ -1,100 +0,0 @@
1
- """
2
- Test script to verify the segmentation and OCR improvements.
3
- This script will process an image using the updated segmentation algorithm
4
- and show how text recognition is prioritized over images.
5
- """
6
-
7
- import os
8
- import json
9
- import tempfile
10
- from pathlib import Path
11
- from PIL import Image
12
-
13
- # Import the key components we modified
14
- from image_segmentation import segment_image_for_ocr
15
- from ocr_processing import process_file, process_result
16
- from utils.image_utils import clean_ocr_result
17
- import logging
18
-
19
- # Configure logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
- def run_test(image_path):
24
- """Run a test on the specified image to verify our fixes"""
25
- print(f"Testing image segmentation and OCR prioritization on: {image_path}")
26
- print("-" * 80)
27
-
28
- # Make sure the image exists
29
- if not os.path.exists(image_path):
30
- print(f"Error: Image not found at {image_path}")
31
- return
32
-
33
- # 1. First run image segmentation directly
34
- try:
35
- print("Step 1: Running image segmentation...")
36
- segmentation_results = segment_image_for_ocr(
37
- image_path,
38
- vision_enabled=True,
39
- preserve_content=True
40
- )
41
-
42
- # Print segmentation info
43
- text_regions_count = len(segmentation_results.get('text_regions_coordinates', []))
44
- print(f"Detected {text_regions_count} text regions in the image")
45
-
46
- # Save output images for inspection
47
- output_dir = Path("output/segmentation_test")
48
- output_dir.mkdir(parents=True, exist_ok=True)
49
-
50
- if segmentation_results['text_regions'] is not None:
51
- output_path = output_dir / f"text_regions_improved.jpg"
52
- segmentation_results['text_regions'].save(output_path)
53
- print(f"Saved text regions visualization to: {output_path}")
54
-
55
- if segmentation_results['image_regions'] is not None:
56
- output_path = output_dir / f"image_regions_improved.jpg"
57
- segmentation_results['image_regions'].save(output_path)
58
- print(f"Saved image regions visualization to: {output_path}")
59
-
60
- if segmentation_results['combined_result'] is not None:
61
- output_path = output_dir / f"combined_result_improved.jpg"
62
- segmentation_results['combined_result'].save(output_path)
63
- print(f"Saved combined result to: {output_path}")
64
-
65
- # Extract individual text regions if available
66
- if 'region_images' in segmentation_results and segmentation_results['region_images']:
67
- region_dir = output_dir / "text_regions"
68
- region_dir.mkdir(exist_ok=True)
69
-
70
- for idx, region_info in enumerate(segmentation_results['region_images']):
71
- region_path = region_dir / f"region_{idx+1}.jpg"
72
- region_info['pil_image'].save(region_path)
73
-
74
- print(f"Saved {len(segmentation_results['region_images'])} individual text regions to {region_dir}")
75
- except Exception as e:
76
- print(f"Error during segmentation: {str(e)}")
77
-
78
- print("-" * 80)
79
- print("Test complete. Check the output directory for results.")
80
- print("The text regions should now properly include all text content in the document.")
81
- print("Image regions should be minimal and not contain text.")
82
-
83
- if __name__ == "__main__":
84
- # Test with an image that has mixed text and image content
85
- # You can change this to any image path you want to test
86
- test_image = "input/baldwin-letter.jpg"
87
- if not os.path.exists(test_image):
88
- print(f"Test image not found at {test_image}, looking for alternatives...")
89
-
90
- # Try to find an alternative test image
91
- for potential_img in ["input/harpers.pdf", "input/magician-or-bottle-cungerer.jpg", "input/magellan-travels.jpg"]:
92
- if os.path.exists(potential_img):
93
- test_image = potential_img
94
- print(f"Using alternative test image: {test_image}")
95
- break
96
-
97
- if os.path.exists(test_image):
98
- run_test(test_image)
99
- else:
100
- print("No suitable test images found. Please place an image in the input directory.")