File size: 4,187 Bytes
795183d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
Final verification test - Models are downloaded, let's test them!
"""
from transformers import pipeline
from PIL import Image
import time
def test_downloaded_models():
"""Test the downloaded models quickly"""
print("π TESTING DOWNLOADED MODELS")
print("=" * 40)
try:
# Test 1: Load TrOCR pipeline (should be fast now)
print("π₯ Loading TrOCR model...")
start_time = time.time()
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
load_time = time.time() - start_time
print(f"β
Model loaded in {load_time:.1f} seconds")
# Test 2: Process test image
print("\nπ Processing test image...")
if Image.open("test_singtel_bill.png"):
img = Image.open("test_singtel_bill.png")
print("β
Test image loaded successfully")
# Process with AI
start_time = time.time()
result = pipe(img)
process_time = time.time() - start_time
extracted_text = result[0]['generated_text']
print(f"β
Processing completed in {process_time:.1f} seconds")
print(f"\nπ EXTRACTED TEXT:")
print(f"'{extracted_text}'")
# Test 3: Simple parsing
print(f"\nπ§ TESTING BILL PARSING:")
# Look for key information
import re
# Check for Singtel
if "singtel" in extracted_text.lower():
print("β
'Singtel' detected in text")
else:
print("β 'Singtel' not clearly detected")
# Check for numbers (amounts, account numbers, etc.)
numbers = re.findall(r'\d+', extracted_text)
if numbers:
print(f"β
Numbers detected: {numbers}")
else:
print("β No numbers detected")
# Check for currency patterns
currency = re.findall(r'\$[\d.]+', extracted_text)
if currency:
print(f"β
Currency amounts: {currency}")
else:
print("β οΈ No currency patterns detected")
print(f"\nπ SUCCESS! Your TrOCR model is working!")
return True
else:
print("β Could not load test image")
return False
except Exception as e:
print(f"β Error: {e}")
return False
def test_real_bill_processing():
"""Show how to process a real bill"""
print(f"\nπ― READY FOR REAL BILLS!")
print("=" * 30)
print("To process your Singtel bill:")
print()
print("1. πΈ Take a clear photo of your bill")
print("2. πΎ Save it as 'my_bill.jpg' in this folder")
print("3. π Run this code:")
print()
print("```python")
print("from transformers import pipeline")
print("from PIL import Image")
print()
print("# Load the model (fast now - already downloaded!)")
print("pipe = pipeline('image-to-text', model='microsoft/trocr-base-handwritten')")
print()
print("# Process your bill")
print("image = Image.open('my_bill.jpg')")
print("result = pipe(image)")
print("text = result[0]['generated_text']")
print()
print("print(f'Extracted: {text}')")
print("```")
print()
print("4. π§ Use singtel_scanner.py for advanced parsing!")
if __name__ == "__main__":
print("FINAL MODEL VERIFICATION")
print("Models are downloaded - testing now!")
print("=" * 50)
success = test_downloaded_models()
if success:
test_real_bill_processing()
print(f"\nβ
COMPLETE! Your Singtel Bill Scanner is ready to use!")
print(f"β‘ All future runs will be instant (models cached)")
else:
print(f"\nβ Something went wrong. Check the error above.")
print("\n" + "=" * 50)
input("Press Enter to finish...")
|