|
"""
|
|
Singtel Bill Scanner - Production Ready Example
|
|
This script shows how to extract key information from Singtel bills
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime
|
|
from transformers import pipeline
|
|
from PIL import Image
|
|
import json
|
|
|
|
class SingtelBillScanner:
|
|
def __init__(self):
|
|
"""Initialize the bill scanner with TrOCR model"""
|
|
print("Initializing Singtel Bill Scanner...")
|
|
self.ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
print("Scanner ready!")
|
|
|
|
def extract_text(self, image_path):
|
|
"""Extract text from bill image"""
|
|
try:
|
|
image = Image.open(image_path)
|
|
result = self.ocr_pipeline(image)
|
|
return result[0]['generated_text']
|
|
except Exception as e:
|
|
print(f"Error extracting text: {e}")
|
|
return None
|
|
|
|
def parse_bill_amount(self, text):
|
|
"""Extract bill amount from text"""
|
|
|
|
amount_patterns = [
|
|
r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'S\$([0-9,]+\.?[0-9]*)',
|
|
r'\$([0-9,]+\.?[0-9]*)',
|
|
]
|
|
|
|
for pattern in amount_patterns:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
amount_str = match.group(1).replace(',', '')
|
|
try:
|
|
return float(amount_str)
|
|
except ValueError:
|
|
continue
|
|
return None
|
|
|
|
def parse_due_date(self, text):
|
|
"""Extract due date from text"""
|
|
|
|
date_patterns = [
|
|
r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
|
|
r'Due Date[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
|
|
r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
|
|
]
|
|
|
|
for pattern in date_patterns:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
date_str = match.group(1)
|
|
|
|
for fmt in ['%d/%m/%Y', '%d-%m-%Y', '%d/%m/%y', '%d-%m-%y']:
|
|
try:
|
|
return datetime.strptime(date_str, fmt).strftime('%Y-%m-%d')
|
|
except ValueError:
|
|
continue
|
|
return None
|
|
|
|
def parse_account_number(self, text):
|
|
"""Extract account number from text"""
|
|
|
|
account_patterns = [
|
|
r'Account[:\s]*([0-9A-Z-]+)',
|
|
r'A/C[:\s]*([0-9A-Z-]+)',
|
|
r'Account No[:\s]*([0-9A-Z-]+)',
|
|
]
|
|
|
|
for pattern in account_patterns:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
return match.group(1).strip()
|
|
return None
|
|
|
|
def parse_bill_period(self, text):
|
|
"""Extract billing period from text"""
|
|
|
|
period_pattern = r'Bill Period[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})\s*to\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})'
|
|
match = re.search(period_pattern, text, re.IGNORECASE)
|
|
if match:
|
|
return {
|
|
'start': match.group(1),
|
|
'end': match.group(2)
|
|
}
|
|
return None
|
|
|
|
def extract_services(self, text):
|
|
"""Extract service charges from text"""
|
|
services = []
|
|
|
|
|
|
service_patterns = [
|
|
r'Mobile[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'Broadband[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'Data[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'Voice[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
r'SMS[:\s]*\$?([0-9,]+\.?[0-9]*)',
|
|
]
|
|
|
|
for pattern in service_patterns:
|
|
match = re.search(pattern, text, re.IGNORECASE)
|
|
if match:
|
|
service_name = pattern.split('[')[0]
|
|
amount = float(match.group(1).replace(',', ''))
|
|
services.append({
|
|
'service': service_name,
|
|
'amount': amount
|
|
})
|
|
|
|
return services
|
|
|
|
def process_bill(self, image_path):
|
|
"""Complete bill processing pipeline"""
|
|
print(f"Processing bill: {image_path}")
|
|
|
|
|
|
raw_text = self.extract_text(image_path)
|
|
if not raw_text:
|
|
return None
|
|
|
|
|
|
bill_data = {
|
|
'raw_text': raw_text,
|
|
'total_amount': self.parse_bill_amount(raw_text),
|
|
'due_date': self.parse_due_date(raw_text),
|
|
'account_number': self.parse_account_number(raw_text),
|
|
'bill_period': self.parse_bill_period(raw_text),
|
|
'services': self.extract_services(raw_text),
|
|
'processed_at': datetime.now().isoformat()
|
|
}
|
|
|
|
return bill_data
|
|
|
|
def save_results(self, bill_data, output_file):
|
|
"""Save processed bill data to JSON"""
|
|
with open(output_file, 'w') as f:
|
|
json.dump(bill_data, f, indent=2)
|
|
print(f"Results saved to: {output_file}")
|
|
|
|
|
|
def main():
|
|
|
|
scanner = SingtelBillScanner()
|
|
|
|
|
|
image_path = "your_singtel_bill.jpg"
|
|
|
|
if input("Do you have a bill image to process? (y/n): ").lower() == 'y':
|
|
image_path = input("Enter the path to your bill image: ")
|
|
|
|
try:
|
|
result = scanner.process_bill(image_path)
|
|
|
|
if result:
|
|
print("\n=== Bill Processing Results ===")
|
|
print(f"Total Amount: ${result.get('total_amount', 'Not found')}")
|
|
print(f"Due Date: {result.get('due_date', 'Not found')}")
|
|
print(f"Account Number: {result.get('account_number', 'Not found')}")
|
|
print(f"Services: {len(result.get('services', []))} found")
|
|
|
|
|
|
output_file = f"bill_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
scanner.save_results(result, output_file)
|
|
|
|
else:
|
|
print("Failed to process bill image")
|
|
|
|
except FileNotFoundError:
|
|
print(f"Image file not found: {image_path}")
|
|
except Exception as e:
|
|
print(f"Error processing bill: {e}")
|
|
|
|
else:
|
|
print("\nTo use this scanner:")
|
|
print("1. Take a clear photo of your Singtel bill")
|
|
print("2. Save it as a JPG or PNG file")
|
|
print("3. Run this script and provide the file path")
|
|
print("4. The scanner will extract key information automatically")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|