Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import time | |
from PIL import Image | |
import numpy as np | |
# Add the parent directory to the path so we can import our modules | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
from utils.image_preprocessing import preprocess_image | |
from models.document_ai import extract_text_and_layout | |
def test_menu_extraction(image_path): | |
""" | |
Test the OCR extraction on a single menu image. | |
Args: | |
image_path: Path to the menu image | |
Returns: | |
Dictionary with test results | |
""" | |
start_time = time.time() | |
# Load and preprocess image | |
image = Image.open(image_path) | |
preprocessed_img = preprocess_image(image) | |
# Extract text | |
try: | |
result = extract_text_and_layout(preprocessed_img) | |
extracted_text = ' '.join(result['words']) if 'words' in result else '' | |
success = True | |
except Exception as e: | |
extracted_text = '' | |
success = False | |
error = str(e) | |
end_time = time.time() | |
# Compile results | |
test_results = { | |
'image_path': image_path, | |
'success': success, | |
'processing_time': end_time - start_time, | |
'extracted_text': extracted_text, | |
'text_length': len(extracted_text), | |
'word_count': len(extracted_text.split()) if extracted_text else 0 | |
} | |
if not success: | |
test_results['error'] = error | |
return test_results | |
def run_batch_test(image_dir): | |
""" | |
Run tests on all images in a directory. | |
Args: | |
image_dir: Directory containing menu images | |
Returns: | |
List of test results | |
""" | |
results = [] | |
for filename in os.listdir(image_dir): | |
if filename.lower().endswith(('.png', '.jpg', '.jpeg')): | |
image_path = os.path.join(image_dir, filename) | |
result = test_menu_extraction(image_path) | |
results.append(result) | |
# Print progress | |
status = "SUCCESS" if result['success'] else "FAILED" | |
print(f"{filename}: {status} - {result['word_count']} words extracted") | |
return results | |
if __name__ == "__main__": | |
# Test with sample menus in the assets directory | |
sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), | |
"assets", "sample_menus") | |
if not os.path.exists(sample_dir): | |
print(f"Sample directory not found: {sample_dir}") | |
print("Creating directory and downloading sample images...") | |
os.makedirs(sample_dir, exist_ok=True) | |
# You would add code here to download sample images | |
# For now, just create a note to add sample images manually | |
with open(os.path.join(sample_dir, "README.txt"), "w") as f: | |
f.write("Add sample menu images to this directory for testing.") | |
results = run_batch_test(sample_dir) | |
# Print summary | |
success_count = sum(1 for r in results if r['success']) | |
print(f"\nSummary: {success_count}/{len(results)} tests passed") | |
if results: | |
avg_words = sum(r['word_count'] for r in results) / len(results) | |
avg_time = sum(r['processing_time'] for r in results) / len(results) | |
print(f"Average words extracted: {avg_words:.1f}") | |
print(f"Average processing time: {avg_time:.2f} seconds") | |