milwright commited on
Commit
2575b9f
·
verified ·
1 Parent(s): d767442

Delete test_adaptive_segmentation.py

Browse files
Files changed (1) hide show
  1. test_adaptive_segmentation.py +0 -98
test_adaptive_segmentation.py DELETED
@@ -1,98 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for adaptive content-aware segmentation.
4
- Processes sample documents to validate the improved segmentation approach.
5
- """
6
-
7
- import os
8
- import sys
9
- import logging
10
- from pathlib import Path
11
- import cv2
12
- import numpy as np
13
- from PIL import Image
14
- import json
15
-
16
- # Configure logging
17
- logging.basicConfig(level=logging.INFO,
18
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19
- logger = logging.getLogger(__name__)
20
-
21
- # Import segmentation module
22
- from image_segmentation import segment_image_for_ocr, process_segmented_image
23
-
24
- # Test documents
25
- TEST_DOCUMENTS = [
26
- "input/baldwin-15th-north.jpg", # Document with varied text density and uppercase sections
27
- "input/americae-retectio.jpg", # Historical document
28
- "input/handwritten-letter.jpg", # Handwritten document
29
- ]
30
-
31
- def test_adaptive_segmentation():
32
- """
33
- Run the adaptive segmentation on test documents and visualize the results.
34
- """
35
- # Create output directory
36
- output_dir = Path("output") / "adaptive_test"
37
- output_dir.mkdir(parents=True, exist_ok=True)
38
-
39
- results = {}
40
-
41
- # Process each test document
42
- for document_path in TEST_DOCUMENTS:
43
- document_file = Path(document_path)
44
- if not document_file.exists():
45
- logger.warning(f"Test document not found: {document_path}")
46
- continue
47
-
48
- logger.info(f"Processing test document: {document_file.name}")
49
-
50
- # Process the document
51
- segmentation_results = process_segmented_image(document_file, output_dir)
52
-
53
- # Create a combined visualization
54
- if segmentation_results.get('text_regions_coordinates'):
55
- # Print analysis
56
- logger.info(f"Document: {document_file.name}")
57
- logger.info(f"Found {len(segmentation_results['text_regions_coordinates'])} text regions")
58
- logger.info(f"Output files: {segmentation_results.get('output_files', {})}")
59
-
60
- # Store results
61
- results[document_file.name] = {
62
- "regions_count": len(segmentation_results['text_regions_coordinates']),
63
- "output_files": segmentation_results.get('output_files', {}),
64
- "regions": segmentation_results.get('text_regions_coordinates', [])
65
- }
66
-
67
- # Save summary report
68
- with open(output_dir / "adaptive_segmentation_results.json", "w") as f:
69
- json.dump(results, f, indent=2)
70
-
71
- # Create a summary report
72
- with open(output_dir / "adaptive_segmentation_report.md", "w") as f:
73
- f.write("# Adaptive Segmentation Test Results\n\n")
74
- f.write("This report summarizes the results of testing the adaptive content-aware segmentation approach.\n\n")
75
-
76
- for document_name, result in results.items():
77
- f.write(f"## {document_name}\n\n")
78
- f.write(f"- Regions detected: {result['regions_count']}\n")
79
- f.write(f"- Output files:\n")
80
- for file_type, file_path in result.get('output_files', {}).items():
81
- f.write(f" - {file_type}: {file_path}\n")
82
- f.write("\n")
83
-
84
- # Add region analysis
85
- if result.get('regions'):
86
- f.write("### Region Analysis\n\n")
87
- f.write("| Region | X | Y | Width | Height |\n")
88
- f.write("|--------|---|---|-------|--------|\n")
89
- for i, region in enumerate(result['regions']):
90
- x, y, w, h = region
91
- f.write(f"| {i+1} | {x} | {y} | {w} | {h} |\n")
92
- f.write("\n")
93
-
94
- logger.info(f"Test completed. Results saved to {output_dir}")
95
- logger.info(f"Summary report: {output_dir / 'adaptive_segmentation_report.md'}")
96
-
97
- if __name__ == "__main__":
98
- test_adaptive_segmentation()