Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import time | |
| from PIL import Image | |
| import numpy as np | |
| # Add the parent directory to the path so we can import our modules | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from utils.image_preprocessing import preprocess_image | |
| from models.document_ai import extract_text_and_layout | |
| def test_menu_extraction(image_path): | |
| """ | |
| Test the OCR extraction on a single menu image. | |
| Args: | |
| image_path: Path to the menu image | |
| Returns: | |
| Dictionary with test results | |
| """ | |
| start_time = time.time() | |
| # Load and preprocess image | |
| image = Image.open(image_path) | |
| preprocessed_img = preprocess_image(image) | |
| # Extract text | |
| try: | |
| result = extract_text_and_layout(preprocessed_img) | |
| extracted_text = ' '.join(result['words']) if 'words' in result else '' | |
| success = True | |
| except Exception as e: | |
| extracted_text = '' | |
| success = False | |
| error = str(e) | |
| end_time = time.time() | |
| # Compile results | |
| test_results = { | |
| 'image_path': image_path, | |
| 'success': success, | |
| 'processing_time': end_time - start_time, | |
| 'extracted_text': extracted_text, | |
| 'text_length': len(extracted_text), | |
| 'word_count': len(extracted_text.split()) if extracted_text else 0 | |
| } | |
| if not success: | |
| test_results['error'] = error | |
| return test_results | |
| def run_batch_test(image_dir): | |
| """ | |
| Run tests on all images in a directory. | |
| Args: | |
| image_dir: Directory containing menu images | |
| Returns: | |
| List of test results | |
| """ | |
| results = [] | |
| for filename in os.listdir(image_dir): | |
| if filename.lower().endswith(('.png', '.jpg', '.jpeg')): | |
| image_path = os.path.join(image_dir, filename) | |
| result = test_menu_extraction(image_path) | |
| results.append(result) | |
| # Print progress | |
| status = "SUCCESS" if result['success'] else "FAILED" | |
| print(f"{filename}: {status} - {result['word_count']} words extracted") | |
| return results | |
| if __name__ == "__main__": | |
| # Test with sample menus in the assets directory | |
| sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), | |
| "assets", "sample_menus") | |
| if not os.path.exists(sample_dir): | |
| print(f"Sample directory not found: {sample_dir}") | |
| print("Creating directory and downloading sample images...") | |
| os.makedirs(sample_dir, exist_ok=True) | |
| # You would add code here to download sample images | |
| # For now, just create a note to add sample images manually | |
| with open(os.path.join(sample_dir, "README.txt"), "w") as f: | |
| f.write("Add sample menu images to this directory for testing.") | |
| results = run_batch_test(sample_dir) | |
| # Print summary | |
| success_count = sum(1 for r in results if r['success']) | |
| print(f"\nSummary: {success_count}/{len(results)} tests passed") | |
| if results: | |
| avg_words = sum(r['word_count'] for r in results) / len(results) | |
| avg_time = sum(r['processing_time'] for r in results) / len(results) | |
| print(f"Average words extracted: {avg_words:.1f}") | |
| print(f"Average processing time: {avg_time:.2f} seconds") | |