File size: 7,058 Bytes
795183d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""

Singtel Bill Scanner - Production Ready Example

This script shows how to extract key information from Singtel bills

"""

import re
from datetime import datetime
from transformers import pipeline
from PIL import Image
import json

class SingtelBillScanner:
    def __init__(self):
        """Initialize the bill scanner with TrOCR model"""
        print("Initializing Singtel Bill Scanner...")
        self.ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
        print("Scanner ready!")
    
    def extract_text(self, image_path):
        """Extract text from bill image"""
        try:
            image = Image.open(image_path)
            result = self.ocr_pipeline(image)
            return result[0]['generated_text']
        except Exception as e:
            print(f"Error extracting text: {e}")
            return None
    
    def parse_bill_amount(self, text):
        """Extract bill amount from text"""
        # Look for patterns like "$123.45", "S$123.45", "Total: $123.45"
        amount_patterns = [
            r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'S\$([0-9,]+\.?[0-9]*)',
            r'\$([0-9,]+\.?[0-9]*)',
        ]
        
        for pattern in amount_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                amount_str = match.group(1).replace(',', '')
                try:
                    return float(amount_str)
                except ValueError:
                    continue
        return None
    
    def parse_due_date(self, text):
        """Extract due date from text"""
        # Look for date patterns
        date_patterns = [
            r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
            r'Due Date[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
            r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
        ]
        
        for pattern in date_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                date_str = match.group(1)
                # Try to parse the date
                for fmt in ['%d/%m/%Y', '%d-%m-%Y', '%d/%m/%y', '%d-%m-%y']:
                    try:
                        return datetime.strptime(date_str, fmt).strftime('%Y-%m-%d')
                    except ValueError:
                        continue
        return None
    
    def parse_account_number(self, text):
        """Extract account number from text"""
        # Look for account number patterns
        account_patterns = [
            r'Account[:\s]*([0-9A-Z-]+)',
            r'A/C[:\s]*([0-9A-Z-]+)',
            r'Account No[:\s]*([0-9A-Z-]+)',
        ]
        
        for pattern in account_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                return match.group(1).strip()
        return None
    
    def parse_bill_period(self, text):
        """Extract billing period from text"""
        # Look for billing period
        period_pattern = r'Bill Period[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})\s*to\s*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})'
        match = re.search(period_pattern, text, re.IGNORECASE)
        if match:
            return {
                'start': match.group(1),
                'end': match.group(2)
            }
        return None
    
    def extract_services(self, text):
        """Extract service charges from text"""
        services = []
        
        # Common Singtel services
        service_patterns = [
            r'Mobile[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Broadband[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Data[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Voice[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'SMS[:\s]*\$?([0-9,]+\.?[0-9]*)',
        ]
        
        for pattern in service_patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                service_name = pattern.split('[')[0]
                amount = float(match.group(1).replace(',', ''))
                services.append({
                    'service': service_name,
                    'amount': amount
                })
        
        return services
    
    def process_bill(self, image_path):
        """Complete bill processing pipeline"""
        print(f"Processing bill: {image_path}")
        
        # Extract text
        raw_text = self.extract_text(image_path)
        if not raw_text:
            return None
        
        # Parse bill information
        bill_data = {
            'raw_text': raw_text,
            'total_amount': self.parse_bill_amount(raw_text),
            'due_date': self.parse_due_date(raw_text),
            'account_number': self.parse_account_number(raw_text),
            'bill_period': self.parse_bill_period(raw_text),
            'services': self.extract_services(raw_text),
            'processed_at': datetime.now().isoformat()
        }
        
        return bill_data
    
    def save_results(self, bill_data, output_file):
        """Save processed bill data to JSON"""
        with open(output_file, 'w') as f:
            json.dump(bill_data, f, indent=2)
        print(f"Results saved to: {output_file}")

# Example usage
def main():
    # Initialize scanner
    scanner = SingtelBillScanner()
    
    # Process a bill image
    image_path = "your_singtel_bill.jpg"  # Replace with your image path
    
    if input("Do you have a bill image to process? (y/n): ").lower() == 'y':
        image_path = input("Enter the path to your bill image: ")
        
        try:
            result = scanner.process_bill(image_path)
            
            if result:
                print("\n=== Bill Processing Results ===")
                print(f"Total Amount: ${result.get('total_amount', 'Not found')}")
                print(f"Due Date: {result.get('due_date', 'Not found')}")
                print(f"Account Number: {result.get('account_number', 'Not found')}")
                print(f"Services: {len(result.get('services', []))} found")
                
                # Save results
                output_file = f"bill_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
                scanner.save_results(result, output_file)
                
            else:
                print("Failed to process bill image")
                
        except FileNotFoundError:
            print(f"Image file not found: {image_path}")
        except Exception as e:
            print(f"Error processing bill: {e}")
    
    else:
        print("\nTo use this scanner:")
        print("1. Take a clear photo of your Singtel bill")
        print("2. Save it as a JPG or PNG file")
        print("3. Run this script and provide the file path")
        print("4. The scanner will extract key information automatically")

if __name__ == "__main__":
    main()