File size: 3,384 Bytes
93c4f75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import sys
import time
from PIL import Image
import numpy as np

# Add the parent directory to the path so we can import our modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from utils.image_preprocessing import preprocess_image
from models.document_ai import extract_text_and_layout

def test_menu_extraction(image_path):
    """
    Test the OCR extraction on a single menu image.
    
    Args:
        image_path: Path to the menu image
        
    Returns:
        Dictionary with test results
    """
    start_time = time.time()
    
    # Load and preprocess image
    image = Image.open(image_path)
    preprocessed_img = preprocess_image(image)
    
    # Extract text
    try:
        result = extract_text_and_layout(preprocessed_img)
        extracted_text = ' '.join(result['words']) if 'words' in result else ''
        success = True
    except Exception as e:
        extracted_text = ''
        success = False
        error = str(e)
    
    end_time = time.time()
    
    # Compile results
    test_results = {
        'image_path': image_path,
        'success': success,
        'processing_time': end_time - start_time,
        'extracted_text': extracted_text,
        'text_length': len(extracted_text),
        'word_count': len(extracted_text.split()) if extracted_text else 0
    }
    
    if not success:
        test_results['error'] = error
        
    return test_results

def run_batch_test(image_dir):
    """
    Run tests on all images in a directory.
    
    Args:
        image_dir: Directory containing menu images
        
    Returns:
        List of test results
    """
    results = []
    
    for filename in os.listdir(image_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(image_dir, filename)
            result = test_menu_extraction(image_path)
            results.append(result)
            
            # Print progress
            status = "SUCCESS" if result['success'] else "FAILED"
            print(f"{filename}: {status} - {result['word_count']} words extracted")
            
    return results

if __name__ == "__main__":
    # Test with sample menus in the assets directory
    sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 
                             "assets", "sample_menus")
    
    if not os.path.exists(sample_dir):
        print(f"Sample directory not found: {sample_dir}")
        print("Creating directory and downloading sample images...")
        os.makedirs(sample_dir, exist_ok=True)
        # You would add code here to download sample images
        # For now, just create a note to add sample images manually
        with open(os.path.join(sample_dir, "README.txt"), "w") as f:
            f.write("Add sample menu images to this directory for testing.")
    
    results = run_batch_test(sample_dir)
    
    # Print summary
    success_count = sum(1 for r in results if r['success'])
    print(f"\nSummary: {success_count}/{len(results)} tests passed")
    
    if results:
        avg_words = sum(r['word_count'] for r in results) / len(results)
        avg_time = sum(r['processing_time'] for r in results) / len(results)
        print(f"Average words extracted: {avg_words:.1f}")
        print(f"Average processing time: {avg_time:.2f} seconds")