Spaces:
Sleeping
Sleeping
File size: 3,384 Bytes
93c4f75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import os
import sys
import time
from PIL import Image
import numpy as np
# Add the parent directory to the path so we can import our modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.image_preprocessing import preprocess_image
from models.document_ai import extract_text_and_layout
def test_menu_extraction(image_path):
"""
Test the OCR extraction on a single menu image.
Args:
image_path: Path to the menu image
Returns:
Dictionary with test results
"""
start_time = time.time()
# Load and preprocess image
image = Image.open(image_path)
preprocessed_img = preprocess_image(image)
# Extract text
try:
result = extract_text_and_layout(preprocessed_img)
extracted_text = ' '.join(result['words']) if 'words' in result else ''
success = True
except Exception as e:
extracted_text = ''
success = False
error = str(e)
end_time = time.time()
# Compile results
test_results = {
'image_path': image_path,
'success': success,
'processing_time': end_time - start_time,
'extracted_text': extracted_text,
'text_length': len(extracted_text),
'word_count': len(extracted_text.split()) if extracted_text else 0
}
if not success:
test_results['error'] = error
return test_results
def run_batch_test(image_dir):
"""
Run tests on all images in a directory.
Args:
image_dir: Directory containing menu images
Returns:
List of test results
"""
results = []
for filename in os.listdir(image_dir):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
image_path = os.path.join(image_dir, filename)
result = test_menu_extraction(image_path)
results.append(result)
# Print progress
status = "SUCCESS" if result['success'] else "FAILED"
print(f"{filename}: {status} - {result['word_count']} words extracted")
return results
if __name__ == "__main__":
# Test with sample menus in the assets directory
sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"assets", "sample_menus")
if not os.path.exists(sample_dir):
print(f"Sample directory not found: {sample_dir}")
print("Creating directory and downloading sample images...")
os.makedirs(sample_dir, exist_ok=True)
# You would add code here to download sample images
# For now, just create a note to add sample images manually
with open(os.path.join(sample_dir, "README.txt"), "w") as f:
f.write("Add sample menu images to this directory for testing.")
results = run_batch_test(sample_dir)
# Print summary
success_count = sum(1 for r in results if r['success'])
print(f"\nSummary: {success_count}/{len(results)} tests passed")
if results:
avg_words = sum(r['word_count'] for r in results) / len(results)
avg_time = sum(r['processing_time'] for r in results) / len(results)
print(f"Average words extracted: {avg_words:.1f}")
print(f"Average processing time: {avg_time:.2f} seconds")
|