|
"""
|
|
Auto-running version of the quick test script
|
|
This will automatically proceed with the model download and testing
|
|
"""
|
|
|
|
from transformers import pipeline
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import sys
|
|
|
|
def test_trocr_with_progress():
|
|
"""
|
|
Test TrOCR model with progress updates
|
|
"""
|
|
print("π Starting TrOCR Model Test...")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
|
|
print("π Step 1: Creating test bill image...")
|
|
img = Image.new('RGB', (400, 150), color='white')
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
|
|
try:
|
|
font = ImageFont.load_default()
|
|
except:
|
|
font = None
|
|
|
|
|
|
draw.text((20, 20), "SINGTEL BILL", fill='black', font=font)
|
|
draw.text((20, 50), "Account: 123-456-789", fill='black', font=font)
|
|
draw.text((20, 70), "Total Amount: $123.45", fill='black', font=font)
|
|
draw.text((20, 90), "Due Date: 31/07/2025", fill='black', font=font)
|
|
draw.text((20, 110), "Thank you for choosing Singtel", fill='black', font=font)
|
|
|
|
|
|
img.save("test_singtel_bill.png")
|
|
print("β
Test image created: test_singtel_bill.png")
|
|
|
|
|
|
print("\nπ₯ Step 2: Loading TrOCR model...")
|
|
print("β³ Note: First time will download ~1.3GB (please wait...)")
|
|
print("This may take 5-10 minutes depending on your internet speed")
|
|
print("-" * 50)
|
|
|
|
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
print("β
Model loaded successfully!")
|
|
|
|
|
|
print("\nπ Step 3: Processing test image...")
|
|
result = pipe(img)
|
|
extracted_text = result[0]['generated_text']
|
|
|
|
|
|
print("\n" + "π RESULTS:")
|
|
print("=" * 30)
|
|
print(f"Original text in image:")
|
|
print(" SINGTEL BILL")
|
|
print(" Account: 123-456-789")
|
|
print(" Total Amount: $123.45")
|
|
print(" Due Date: 31/07/2025")
|
|
print(" Thank you for choosing Singtel")
|
|
print()
|
|
print(f"AI Extracted text: '{extracted_text}'")
|
|
print()
|
|
|
|
|
|
print("π§ Step 4: Testing bill processing functions...")
|
|
|
|
|
|
import re
|
|
|
|
|
|
amount_match = re.search(r'\$([0-9.]+)', extracted_text)
|
|
found_amount = amount_match.group(1) if amount_match else "Not detected"
|
|
|
|
|
|
numbers = re.findall(r'\d+', extracted_text)
|
|
|
|
print(f" Detected amount: ${found_amount}")
|
|
print(f" Detected numbers: {numbers}")
|
|
|
|
print("\nβ
SUCCESS! The TrOCR model is working correctly!")
|
|
print("π― You can now process real Singtel bill images!")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"\nβ Error during testing: {e}")
|
|
print("\nTroubleshooting tips:")
|
|
print("1. Check your internet connection")
|
|
print("2. Make sure you have enough disk space (~2GB)")
|
|
print("3. Try running again - downloads can sometimes fail")
|
|
return False
|
|
|
|
def show_next_steps():
|
|
"""Show what to do after successful test"""
|
|
print("\n" + "π― NEXT STEPS:")
|
|
print("=" * 40)
|
|
print("1. β
Model is now cached and ready for use")
|
|
print("2. πΈ Take a photo of your Singtel bill")
|
|
print("3. π Run the full scanner:")
|
|
print(" python singtel_scanner.py")
|
|
print()
|
|
print("4. π§ Or use the model directly in your code:")
|
|
print("""
|
|
from transformers import pipeline
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
result = pipe(Image.open("your_bill.jpg"))
|
|
text = result[0]['generated_text']
|
|
""")
|
|
print("\n5. π Models are cached in: %USERPROFILE%\\.cache\\huggingface\\")
|
|
print("6. π Future runs will be instant (no re-download)")
|
|
|
|
if __name__ == "__main__":
|
|
print("SINGTEL BILL SCANNER - AUTO TEST")
|
|
print("This will automatically download and test the AI model")
|
|
print("=" * 60)
|
|
|
|
|
|
success = test_trocr_with_progress()
|
|
|
|
if success:
|
|
show_next_steps()
|
|
print("\nπ CONGRATULATIONS! Your Singtel Bill Scanner is ready!")
|
|
else:
|
|
print("\nπ§ Setup needs attention. Check the error messages above.")
|
|
|
|
print("\n" + "=" * 60)
|
|
input("Press Enter to exit...")
|
|
|