|
"""
|
|
Simple test script for Singtel Bill Scanner models
|
|
This script demonstrates basic usage without heavy model downloads
|
|
"""
|
|
|
|
from transformers import pipeline
|
|
from PIL import Image
|
|
import requests
|
|
import io
|
|
|
|
def test_trocr_simple():
|
|
"""
|
|
Test TrOCR model with a simple example
|
|
"""
|
|
print("Testing TrOCR model...")
|
|
|
|
try:
|
|
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
|
|
img = Image.new('RGB', (300, 100), color='white')
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
|
|
try:
|
|
|
|
font = ImageFont.load_default()
|
|
except:
|
|
font = None
|
|
|
|
draw.text((10, 30), "Total: $123.45", fill='black', font=font)
|
|
draw.text((10, 50), "Due: 2025-07-31", fill='black', font=font)
|
|
|
|
|
|
img.save("test_bill.png")
|
|
print("Created test bill image: test_bill.png")
|
|
|
|
|
|
print("Loading TrOCR pipeline...")
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
|
|
print("Processing test image...")
|
|
result = pipe(img)
|
|
|
|
print(f"Extracted text: {result[0]['generated_text']}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error in TrOCR test: {e}")
|
|
return False
|
|
|
|
def quick_usage_demo():
|
|
"""
|
|
Show quick usage examples without running heavy models
|
|
"""
|
|
print("\n=== Quick Usage Demo ===")
|
|
|
|
print("\n1. Basic TrOCR Usage:")
|
|
print("""
|
|
from transformers import pipeline
|
|
from PIL import Image
|
|
|
|
# Initialize pipeline
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
# Process image
|
|
image = Image.open("your_bill.jpg")
|
|
result = pipe(image)
|
|
text = result[0]['generated_text']
|
|
""")
|
|
|
|
print("\n2. Bill Processing Function:")
|
|
print("""
|
|
def process_bill(image_path):
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
image = Image.open(image_path)
|
|
result = pipe(image)
|
|
|
|
# Extract bill information
|
|
text = result[0]['generated_text']
|
|
|
|
# Parse specific information
|
|
total_amount = extract_amount(text)
|
|
due_date = extract_date(text)
|
|
account_number = extract_account(text)
|
|
|
|
return {
|
|
'total': total_amount,
|
|
'due_date': due_date,
|
|
'account': account_number,
|
|
'raw_text': text
|
|
}
|
|
""")
|
|
|
|
if __name__ == "__main__":
|
|
print("=== Singtel Bill Scanner - Quick Test ===")
|
|
|
|
|
|
quick_usage_demo()
|
|
|
|
|
|
print("\n" + "="*50)
|
|
print("Would you like to run the actual TrOCR test?")
|
|
print("Note: This will download the model (~1.3GB) on first run")
|
|
print("="*50)
|
|
|
|
user_input = input("Run test? (y/n): ").lower().strip()
|
|
|
|
if user_input == 'y' or user_input == 'yes':
|
|
success = test_trocr_simple()
|
|
if success:
|
|
print("\n✅ Test completed successfully!")
|
|
print("You can now use the models with your bill images.")
|
|
else:
|
|
print("\n❌ Test failed. Check the error messages above.")
|
|
else:
|
|
print("\nTest skipped. Refer to the usage examples above.")
|
|
print("Run this script again with 'y' when you're ready to test.")
|
|
|
|
print("\n📚 For detailed documentation, see README.md")
|
|
print("📁 Your models will be cached in ~/.cache/huggingface/ for future use")
|
|
|