File size: 5,489 Bytes
795183d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
"""
Minimal Singtel Bill Scanner - No Heavy Downloads
This version shows you how to use the models without downloading them immediately
"""
def show_usage_examples():
"""Display usage examples for the models"""
print("=" * 60)
print("SINGTEL BILL SCANNER - USAGE GUIDE")
print("=" * 60)
print("\n1. BASIC SETUP:")
print("-" * 20)
print("# Install required packages:")
print("pip install torch transformers Pillow requests")
print()
print("2. SIMPLE TEXT EXTRACTION:")
print("-" * 30)
print("""
from transformers import pipeline
from PIL import Image
# Initialize TrOCR model (downloads ~1.3GB first time)
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
# Process your bill image
image = Image.open("your_singtel_bill.jpg")
result = pipe(image)
extracted_text = result[0]['generated_text']
print(f"Extracted text: {extracted_text}")
""")
print("3. BILL PROCESSING FUNCTION:")
print("-" * 35)
print("""
def process_singtel_bill(image_path):
# Load the model
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
# Extract text
image = Image.open(image_path)
result = pipe(image)
text = result[0]['generated_text']
# Parse bill information
import re
# Extract total amount
amount_match = re.search(r'Total[:]\s*\$?([0-9,]+\.?[0-9]*)', text, re.IGNORECASE)
total_amount = float(amount_match.group(1).replace(',', '')) if amount_match else None
# Extract due date
date_match = re.search(r'Due[:]\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', text, re.IGNORECASE)
due_date = date_match.group(1) if date_match else None
# Extract account number
account_match = re.search(r'Account[:]\s*([0-9A-Z-]+)', text, re.IGNORECASE)
account_number = account_match.group(1) if account_match else None
return {
'raw_text': text,
'total_amount': total_amount,
'due_date': due_date,
'account_number': account_number
}
# Usage
result = process_singtel_bill("your_bill.jpg")
print(f"Total: ${result['total_amount']}")
print(f"Due: {result['due_date']}")
print(f"Account: {result['account_number']}")
""")
print("4. TROUBLESHOOTING:")
print("-" * 20)
print("If you get import errors:")
print("- Make sure you're in the virtual environment")
print("- Run: pip install --upgrade torch transformers Pillow")
print("- Check Python version (needs 3.8+)")
print()
print("If models download slowly:")
print("- Models are ~1.3GB and download once")
print("- Cached in ~/.cache/huggingface/")
print("- Use good internet connection")
print()
print("5. STEP-BY-STEP PROCESS:")
print("-" * 28)
steps = [
"1. Take clear photo of your Singtel bill",
"2. Save as JPG/PNG file",
"3. Run the processing script",
"4. Model downloads automatically (first time only)",
"5. Text is extracted and parsed",
"6. Get structured bill data as output"
]
for step in steps:
print(f" {step}")
print("\n" + "=" * 60)
print("Ready to start? Run one of these files:")
print(" - quick_test.py (interactive test)")
print(" - singtel_scanner.py (full scanner)")
print(" - test_model.py (comprehensive examples)")
print("=" * 60)
def check_environment():
"""Check if the environment is set up correctly"""
print("Checking environment setup...")
try:
import sys
print(f"β
Python version: {sys.version}")
# Try importing required packages
packages = ['torch', 'transformers', 'PIL', 'requests']
missing_packages = []
for package in packages:
try:
__import__(package)
print(f"β
{package} is installed")
except ImportError:
print(f"β {package} is NOT installed")
missing_packages.append(package)
if missing_packages:
print(f"\nTo install missing packages, run:")
print(f"pip install {' '.join(missing_packages)}")
return False
else:
print("\nπ All packages are installed! You're ready to go!")
return True
except Exception as e:
print(f"Error checking environment: {e}")
return False
if __name__ == "__main__":
print("SINGTEL BILL SCANNER - MINIMAL VERSION")
print("This version helps you get started without heavy downloads\n")
# Check environment first
env_ok = check_environment()
if env_ok:
print("\n" + "π" * 20)
print("ENVIRONMENT IS READY!")
print("π" * 20)
choice = input("\nWhat would you like to do?\n1. See usage examples\n2. Exit\nEnter choice (1-2): ")
if choice == "1":
show_usage_examples()
else:
print("\n" + "β οΈ" * 20)
print("SETUP REQUIRED")
print("β οΈ" * 20)
print("\nRun this to install packages:")
print("pip install torch transformers Pillow requests huggingface-hub")
print("\nThen run this script again.")
input("\nPress Enter to exit...")
|