Spaces:
Running
Running
Delete test_adaptive_segmentation.py
Browse files
test_adaptive_segmentation.py
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Test script for adaptive content-aware segmentation.
|
4 |
-
Processes sample documents to validate the improved segmentation approach.
|
5 |
-
"""
|
6 |
-
|
7 |
-
import os
|
8 |
-
import sys
|
9 |
-
import logging
|
10 |
-
from pathlib import Path
|
11 |
-
import cv2
|
12 |
-
import numpy as np
|
13 |
-
from PIL import Image
|
14 |
-
import json
|
15 |
-
|
16 |
-
# Configure logging
|
17 |
-
logging.basicConfig(level=logging.INFO,
|
18 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
19 |
-
logger = logging.getLogger(__name__)
|
20 |
-
|
21 |
-
# Import segmentation module
|
22 |
-
from image_segmentation import segment_image_for_ocr, process_segmented_image
|
23 |
-
|
24 |
-
# Test documents
|
25 |
-
TEST_DOCUMENTS = [
|
26 |
-
"input/baldwin-15th-north.jpg", # Document with varied text density and uppercase sections
|
27 |
-
"input/americae-retectio.jpg", # Historical document
|
28 |
-
"input/handwritten-letter.jpg", # Handwritten document
|
29 |
-
]
|
30 |
-
|
31 |
-
def test_adaptive_segmentation():
|
32 |
-
"""
|
33 |
-
Run the adaptive segmentation on test documents and visualize the results.
|
34 |
-
"""
|
35 |
-
# Create output directory
|
36 |
-
output_dir = Path("output") / "adaptive_test"
|
37 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
38 |
-
|
39 |
-
results = {}
|
40 |
-
|
41 |
-
# Process each test document
|
42 |
-
for document_path in TEST_DOCUMENTS:
|
43 |
-
document_file = Path(document_path)
|
44 |
-
if not document_file.exists():
|
45 |
-
logger.warning(f"Test document not found: {document_path}")
|
46 |
-
continue
|
47 |
-
|
48 |
-
logger.info(f"Processing test document: {document_file.name}")
|
49 |
-
|
50 |
-
# Process the document
|
51 |
-
segmentation_results = process_segmented_image(document_file, output_dir)
|
52 |
-
|
53 |
-
# Create a combined visualization
|
54 |
-
if segmentation_results.get('text_regions_coordinates'):
|
55 |
-
# Print analysis
|
56 |
-
logger.info(f"Document: {document_file.name}")
|
57 |
-
logger.info(f"Found {len(segmentation_results['text_regions_coordinates'])} text regions")
|
58 |
-
logger.info(f"Output files: {segmentation_results.get('output_files', {})}")
|
59 |
-
|
60 |
-
# Store results
|
61 |
-
results[document_file.name] = {
|
62 |
-
"regions_count": len(segmentation_results['text_regions_coordinates']),
|
63 |
-
"output_files": segmentation_results.get('output_files', {}),
|
64 |
-
"regions": segmentation_results.get('text_regions_coordinates', [])
|
65 |
-
}
|
66 |
-
|
67 |
-
# Save summary report
|
68 |
-
with open(output_dir / "adaptive_segmentation_results.json", "w") as f:
|
69 |
-
json.dump(results, f, indent=2)
|
70 |
-
|
71 |
-
# Create a summary report
|
72 |
-
with open(output_dir / "adaptive_segmentation_report.md", "w") as f:
|
73 |
-
f.write("# Adaptive Segmentation Test Results\n\n")
|
74 |
-
f.write("This report summarizes the results of testing the adaptive content-aware segmentation approach.\n\n")
|
75 |
-
|
76 |
-
for document_name, result in results.items():
|
77 |
-
f.write(f"## {document_name}\n\n")
|
78 |
-
f.write(f"- Regions detected: {result['regions_count']}\n")
|
79 |
-
f.write(f"- Output files:\n")
|
80 |
-
for file_type, file_path in result.get('output_files', {}).items():
|
81 |
-
f.write(f" - {file_type}: {file_path}\n")
|
82 |
-
f.write("\n")
|
83 |
-
|
84 |
-
# Add region analysis
|
85 |
-
if result.get('regions'):
|
86 |
-
f.write("### Region Analysis\n\n")
|
87 |
-
f.write("| Region | X | Y | Width | Height |\n")
|
88 |
-
f.write("|--------|---|---|-------|--------|\n")
|
89 |
-
for i, region in enumerate(result['regions']):
|
90 |
-
x, y, w, h = region
|
91 |
-
f.write(f"| {i+1} | {x} | {y} | {w} | {h} |\n")
|
92 |
-
f.write("\n")
|
93 |
-
|
94 |
-
logger.info(f"Test completed. Results saved to {output_dir}")
|
95 |
-
logger.info(f"Summary report: {output_dir / 'adaptive_segmentation_report.md'}")
|
96 |
-
|
97 |
-
if __name__ == "__main__":
|
98 |
-
test_adaptive_segmentation()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|