|
"""
|
|
Minimal Singtel Bill Scanner - No Heavy Downloads
|
|
This version shows you how to use the models without downloading them immediately
|
|
"""
|
|
|
|
def show_usage_examples():
|
|
"""Display usage examples for the models"""
|
|
|
|
print("=" * 60)
|
|
print("SINGTEL BILL SCANNER - USAGE GUIDE")
|
|
print("=" * 60)
|
|
|
|
print("\n1. BASIC SETUP:")
|
|
print("-" * 20)
|
|
print("# Install required packages:")
|
|
print("pip install torch transformers Pillow requests")
|
|
print()
|
|
|
|
print("2. SIMPLE TEXT EXTRACTION:")
|
|
print("-" * 30)
|
|
print("""
|
|
from transformers import pipeline
|
|
from PIL import Image
|
|
|
|
# Initialize TrOCR model (downloads ~1.3GB first time)
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
# Process your bill image
|
|
image = Image.open("your_singtel_bill.jpg")
|
|
result = pipe(image)
|
|
extracted_text = result[0]['generated_text']
|
|
|
|
print(f"Extracted text: {extracted_text}")
|
|
""")
|
|
|
|
print("3. BILL PROCESSING FUNCTION:")
|
|
print("-" * 35)
|
|
print("""
|
|
def process_singtel_bill(image_path):
|
|
# Load the model
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
# Extract text
|
|
image = Image.open(image_path)
|
|
result = pipe(image)
|
|
text = result[0]['generated_text']
|
|
|
|
# Parse bill information
|
|
import re
|
|
|
|
# Extract total amount
|
|
amount_match = re.search(r'Total[:]\s*\$?([0-9,]+\.?[0-9]*)', text, re.IGNORECASE)
|
|
total_amount = float(amount_match.group(1).replace(',', '')) if amount_match else None
|
|
|
|
# Extract due date
|
|
date_match = re.search(r'Due[:]\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', text, re.IGNORECASE)
|
|
due_date = date_match.group(1) if date_match else None
|
|
|
|
# Extract account number
|
|
account_match = re.search(r'Account[:]\s*([0-9A-Z-]+)', text, re.IGNORECASE)
|
|
account_number = account_match.group(1) if account_match else None
|
|
|
|
return {
|
|
'raw_text': text,
|
|
'total_amount': total_amount,
|
|
'due_date': due_date,
|
|
'account_number': account_number
|
|
}
|
|
|
|
# Usage
|
|
result = process_singtel_bill("your_bill.jpg")
|
|
print(f"Total: ${result['total_amount']}")
|
|
print(f"Due: {result['due_date']}")
|
|
print(f"Account: {result['account_number']}")
|
|
""")
|
|
|
|
print("4. TROUBLESHOOTING:")
|
|
print("-" * 20)
|
|
print("If you get import errors:")
|
|
print("- Make sure you're in the virtual environment")
|
|
print("- Run: pip install --upgrade torch transformers Pillow")
|
|
print("- Check Python version (needs 3.8+)")
|
|
print()
|
|
|
|
print("If models download slowly:")
|
|
print("- Models are ~1.3GB and download once")
|
|
print("- Cached in ~/.cache/huggingface/")
|
|
print("- Use good internet connection")
|
|
print()
|
|
|
|
print("5. STEP-BY-STEP PROCESS:")
|
|
print("-" * 28)
|
|
steps = [
|
|
"1. Take clear photo of your Singtel bill",
|
|
"2. Save as JPG/PNG file",
|
|
"3. Run the processing script",
|
|
"4. Model downloads automatically (first time only)",
|
|
"5. Text is extracted and parsed",
|
|
"6. Get structured bill data as output"
|
|
]
|
|
|
|
for step in steps:
|
|
print(f" {step}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Ready to start? Run one of these files:")
|
|
print(" - quick_test.py (interactive test)")
|
|
print(" - singtel_scanner.py (full scanner)")
|
|
print(" - test_model.py (comprehensive examples)")
|
|
print("=" * 60)
|
|
|
|
def check_environment():
|
|
"""Check if the environment is set up correctly"""
|
|
print("Checking environment setup...")
|
|
|
|
try:
|
|
import sys
|
|
print(f"β
Python version: {sys.version}")
|
|
|
|
|
|
packages = ['torch', 'transformers', 'PIL', 'requests']
|
|
missing_packages = []
|
|
|
|
for package in packages:
|
|
try:
|
|
__import__(package)
|
|
print(f"β
{package} is installed")
|
|
except ImportError:
|
|
print(f"β {package} is NOT installed")
|
|
missing_packages.append(package)
|
|
|
|
if missing_packages:
|
|
print(f"\nTo install missing packages, run:")
|
|
print(f"pip install {' '.join(missing_packages)}")
|
|
return False
|
|
else:
|
|
print("\nπ All packages are installed! You're ready to go!")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error checking environment: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
print("SINGTEL BILL SCANNER - MINIMAL VERSION")
|
|
print("This version helps you get started without heavy downloads\n")
|
|
|
|
|
|
env_ok = check_environment()
|
|
|
|
if env_ok:
|
|
print("\n" + "π" * 20)
|
|
print("ENVIRONMENT IS READY!")
|
|
print("π" * 20)
|
|
|
|
choice = input("\nWhat would you like to do?\n1. See usage examples\n2. Exit\nEnter choice (1-2): ")
|
|
|
|
if choice == "1":
|
|
show_usage_examples()
|
|
|
|
else:
|
|
print("\n" + "β οΈ" * 20)
|
|
print("SETUP REQUIRED")
|
|
print("β οΈ" * 20)
|
|
print("\nRun this to install packages:")
|
|
print("pip install torch transformers Pillow requests huggingface-hub")
|
|
print("\nThen run this script again.")
|
|
|
|
input("\nPress Enter to exit...")
|
|
|