File size: 7,091 Bytes
5ff1fa8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Streamlit web app for Singtel Bill Scanner
This creates a user-friendly interface for the bill scanner
"""

import streamlit as st
from PIL import Image
import io
import base64

# Only import heavy libraries when needed
@st.cache_resource
def load_ocr_model():
    """Load the OCR model (cached for performance)"""
    from transformers import pipeline
    return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

def process_bill_image(image, pipe):
    """Process the uploaded bill image"""
    try:
        # Process with TrOCR
        result = pipe(image)
        extracted_text = result[0]['generated_text']
        
        # Simple parsing
        import re
        #added something
        
        # Extract key information
        parsed_data = {
            'raw_text': extracted_text,
            'total_amount': None,
            'due_date': None,
            'account_number': None,
            'services': []
        }
        
        # Look for total amount
        amount_patterns = [
            r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'\$([0-9,]+\.?[0-9]*)',
        ]
        
        for pattern in amount_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                try:
                    parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
                    break
                except ValueError:
                    continue
        
        # Look for due date
        date_patterns = [
            r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
            r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
        ]
        
        for pattern in date_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                parsed_data['due_date'] = match.group(1)
                break
        
        # Look for account number
        account_patterns = [
            r'Account[:\s]*([0-9A-Z-]+)',
            r'A/C[:\s]*([0-9A-Z-]+)',
        ]
        
        for pattern in account_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                parsed_data['account_number'] = match.group(1).strip()
                break
        
        return parsed_data
        
    except Exception as e:
        st.error(f"Error processing image: {e}")
        return None

def main():
    st.set_page_config(
        page_title="Singtel Bill Scanner",
        page_icon="πŸ“±",
        layout="wide"
    )
    
    st.title("πŸ“± Singtel Bill Scanner")
    st.markdown("### AI-Powered OCR for Singtel Bills")
    
    st.markdown("""
    Upload an image of your Singtel bill and extract key information automatically using AI!
    
    **Features:**
    - πŸ” Extract text from handwritten and printed bills
    - πŸ’° Identify total amounts and charges
    - πŸ“… Find due dates
    - πŸ”’ Extract account numbers
    """)
    
    # Sidebar with instructions
    with st.sidebar:
        st.markdown("### πŸ“‹ Instructions")
        st.markdown("""
        1. **Take a clear photo** of your Singtel bill
        2. **Upload the image** using the file uploader
        3. **Wait for processing** (may take a few seconds)
        4. **Review extracted information**
        
        **Tips for better results:**
        - Use good lighting
        - Keep the image straight
        - Ensure text is clearly visible
        - Avoid shadows and glare
        """)
        
        st.markdown("### πŸ”§ Technical Details")
        st.markdown("""
        - **Model**: Microsoft TrOCR
        - **Accuracy**: High for clear images
        - **Processing**: ~3-5 seconds
        - **Privacy**: Images not stored
        """)
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.markdown("### πŸ“€ Upload Bill Image")
        
        uploaded_file = st.file_uploader(
            "Choose a bill image...",
            type=['png', 'jpg', 'jpeg'],
            help="Upload a clear image of your Singtel bill"
        )
        
        if uploaded_file is not None:
            # Display the uploaded image
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Bill", use_column_width=True)
            
            # Process button
            if st.button("πŸ” Extract Information", type="primary"):
                with st.spinner("Processing image with AI..."):
                    # Load model
                    pipe = load_ocr_model()
                    
                    # Process image
                    result = process_bill_image(image, pipe)
                    
                    if result:
                        st.session_state['processing_result'] = result
                        st.success("βœ… Processing completed!")
    
    with col2:
        st.markdown("### πŸ“Š Extracted Information")
        
        if 'processing_result' in st.session_state:
            result = st.session_state['processing_result']
            
            # Display parsed information
            st.markdown("#### πŸ’° Bill Summary")
            
            col_a, col_b = st.columns(2)
            
            with col_a:
                if result['total_amount']:
                    st.metric("Total Amount", f"${result['total_amount']:.2f}")
                else:
                    st.metric("Total Amount", "Not detected")
            
            with col_b:
                if result['due_date']:
                    st.metric("Due Date", result['due_date'])
                else:
                    st.metric("Due Date", "Not detected")
            
            if result['account_number']:
                st.markdown(f"**Account Number:** {result['account_number']}")
            else:
                st.markdown("**Account Number:** Not detected")
            
            # Raw extracted text
            st.markdown("#### πŸ“ Raw Extracted Text")
            st.text_area(
                "Full text extracted from image:",
                value=result['raw_text'],
                height=150,
                disabled=True
            )
            
            # Download option
            st.markdown("#### πŸ’Ύ Export Data")
            import json
            json_data = json.dumps(result, indent=2)
            st.download_button(
                label="πŸ“„ Download as JSON",
                data=json_data,
                file_name="bill_data.json",
                mime="application/json"
            )
            
        else:
            st.info("πŸ‘† Upload an image and click 'Extract Information' to see results here")
    
    # Footer
    st.markdown("---")
    st.markdown("""
    <div style='text-align: center'>
        <p>Built with ❀️ using Streamlit and Hugging Face Transformers</p>
        <p>πŸ€– Powered by Microsoft TrOCR | πŸ”’ Your images are processed locally and not stored</p>
    </div>
    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()