Cosmo125 zixuanvtzx commited on
Commit
5ff1fa8
Β·
verified Β·
1 Parent(s): 795183d

Added a comment (#2)

Browse files

- Added a comment (dd04eeacaea2330910f5fff63845c62d6b43abf4)


Co-authored-by: Tan Zi Xuan <[email protected]>

Files changed (1) hide show
  1. app.py +219 -218
app.py CHANGED
@@ -1,218 +1,219 @@
1
- """
2
- Streamlit web app for Singtel Bill Scanner
3
- This creates a user-friendly interface for the bill scanner
4
- """
5
-
6
- import streamlit as st
7
- from PIL import Image
8
- import io
9
- import base64
10
-
11
- # Only import heavy libraries when needed
12
- @st.cache_resource
13
- def load_ocr_model():
14
- """Load the OCR model (cached for performance)"""
15
- from transformers import pipeline
16
- return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
17
-
18
- def process_bill_image(image, pipe):
19
- """Process the uploaded bill image"""
20
- try:
21
- # Process with TrOCR
22
- result = pipe(image)
23
- extracted_text = result[0]['generated_text']
24
-
25
- # Simple parsing
26
- import re
27
-
28
- # Extract key information
29
- parsed_data = {
30
- 'raw_text': extracted_text,
31
- 'total_amount': None,
32
- 'due_date': None,
33
- 'account_number': None,
34
- 'services': []
35
- }
36
-
37
- # Look for total amount
38
- amount_patterns = [
39
- r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
40
- r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
41
- r'\$([0-9,]+\.?[0-9]*)',
42
- ]
43
-
44
- for pattern in amount_patterns:
45
- match = re.search(pattern, extracted_text, re.IGNORECASE)
46
- if match:
47
- try:
48
- parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
49
- break
50
- except ValueError:
51
- continue
52
-
53
- # Look for due date
54
- date_patterns = [
55
- r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
56
- r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
57
- ]
58
-
59
- for pattern in date_patterns:
60
- match = re.search(pattern, extracted_text, re.IGNORECASE)
61
- if match:
62
- parsed_data['due_date'] = match.group(1)
63
- break
64
-
65
- # Look for account number
66
- account_patterns = [
67
- r'Account[:\s]*([0-9A-Z-]+)',
68
- r'A/C[:\s]*([0-9A-Z-]+)',
69
- ]
70
-
71
- for pattern in account_patterns:
72
- match = re.search(pattern, extracted_text, re.IGNORECASE)
73
- if match:
74
- parsed_data['account_number'] = match.group(1).strip()
75
- break
76
-
77
- return parsed_data
78
-
79
- except Exception as e:
80
- st.error(f"Error processing image: {e}")
81
- return None
82
-
83
- def main():
84
- st.set_page_config(
85
- page_title="Singtel Bill Scanner",
86
- page_icon="πŸ“±",
87
- layout="wide"
88
- )
89
-
90
- st.title("πŸ“± Singtel Bill Scanner")
91
- st.markdown("### AI-Powered OCR for Singtel Bills")
92
-
93
- st.markdown("""
94
- Upload an image of your Singtel bill and extract key information automatically using AI!
95
-
96
- **Features:**
97
- - πŸ” Extract text from handwritten and printed bills
98
- - πŸ’° Identify total amounts and charges
99
- - πŸ“… Find due dates
100
- - πŸ”’ Extract account numbers
101
- """)
102
-
103
- # Sidebar with instructions
104
- with st.sidebar:
105
- st.markdown("### πŸ“‹ Instructions")
106
- st.markdown("""
107
- 1. **Take a clear photo** of your Singtel bill
108
- 2. **Upload the image** using the file uploader
109
- 3. **Wait for processing** (may take a few seconds)
110
- 4. **Review extracted information**
111
-
112
- **Tips for better results:**
113
- - Use good lighting
114
- - Keep the image straight
115
- - Ensure text is clearly visible
116
- - Avoid shadows and glare
117
- """)
118
-
119
- st.markdown("### πŸ”§ Technical Details")
120
- st.markdown("""
121
- - **Model**: Microsoft TrOCR
122
- - **Accuracy**: High for clear images
123
- - **Processing**: ~3-5 seconds
124
- - **Privacy**: Images not stored
125
- """)
126
-
127
- # Main content area
128
- col1, col2 = st.columns([1, 1])
129
-
130
- with col1:
131
- st.markdown("### πŸ“€ Upload Bill Image")
132
-
133
- uploaded_file = st.file_uploader(
134
- "Choose a bill image...",
135
- type=['png', 'jpg', 'jpeg'],
136
- help="Upload a clear image of your Singtel bill"
137
- )
138
-
139
- if uploaded_file is not None:
140
- # Display the uploaded image
141
- image = Image.open(uploaded_file)
142
- st.image(image, caption="Uploaded Bill", use_column_width=True)
143
-
144
- # Process button
145
- if st.button("πŸ” Extract Information", type="primary"):
146
- with st.spinner("Processing image with AI..."):
147
- # Load model
148
- pipe = load_ocr_model()
149
-
150
- # Process image
151
- result = process_bill_image(image, pipe)
152
-
153
- if result:
154
- st.session_state['processing_result'] = result
155
- st.success("βœ… Processing completed!")
156
-
157
- with col2:
158
- st.markdown("### πŸ“Š Extracted Information")
159
-
160
- if 'processing_result' in st.session_state:
161
- result = st.session_state['processing_result']
162
-
163
- # Display parsed information
164
- st.markdown("#### πŸ’° Bill Summary")
165
-
166
- col_a, col_b = st.columns(2)
167
-
168
- with col_a:
169
- if result['total_amount']:
170
- st.metric("Total Amount", f"${result['total_amount']:.2f}")
171
- else:
172
- st.metric("Total Amount", "Not detected")
173
-
174
- with col_b:
175
- if result['due_date']:
176
- st.metric("Due Date", result['due_date'])
177
- else:
178
- st.metric("Due Date", "Not detected")
179
-
180
- if result['account_number']:
181
- st.markdown(f"**Account Number:** {result['account_number']}")
182
- else:
183
- st.markdown("**Account Number:** Not detected")
184
-
185
- # Raw extracted text
186
- st.markdown("#### πŸ“ Raw Extracted Text")
187
- st.text_area(
188
- "Full text extracted from image:",
189
- value=result['raw_text'],
190
- height=150,
191
- disabled=True
192
- )
193
-
194
- # Download option
195
- st.markdown("#### πŸ’Ύ Export Data")
196
- import json
197
- json_data = json.dumps(result, indent=2)
198
- st.download_button(
199
- label="πŸ“„ Download as JSON",
200
- data=json_data,
201
- file_name="bill_data.json",
202
- mime="application/json"
203
- )
204
-
205
- else:
206
- st.info("πŸ‘† Upload an image and click 'Extract Information' to see results here")
207
-
208
- # Footer
209
- st.markdown("---")
210
- st.markdown("""
211
- <div style='text-align: center'>
212
- <p>Built with ❀️ using Streamlit and Hugging Face Transformers</p>
213
- <p>πŸ€– Powered by Microsoft TrOCR | πŸ”’ Your images are processed locally and not stored</p>
214
- </div>
215
- """, unsafe_allow_html=True)
216
-
217
- if __name__ == "__main__":
218
- main()
 
 
1
+ """
2
+ Streamlit web app for Singtel Bill Scanner
3
+ This creates a user-friendly interface for the bill scanner
4
+ """
5
+
6
+ import streamlit as st
7
+ from PIL import Image
8
+ import io
9
+ import base64
10
+
11
+ # Only import heavy libraries when needed
12
+ @st.cache_resource
13
+ def load_ocr_model():
14
+ """Load the OCR model (cached for performance)"""
15
+ from transformers import pipeline
16
+ return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
17
+
18
+ def process_bill_image(image, pipe):
19
+ """Process the uploaded bill image"""
20
+ try:
21
+ # Process with TrOCR
22
+ result = pipe(image)
23
+ extracted_text = result[0]['generated_text']
24
+
25
+ # Simple parsing
26
+ import re
27
+ #added something
28
+
29
+ # Extract key information
30
+ parsed_data = {
31
+ 'raw_text': extracted_text,
32
+ 'total_amount': None,
33
+ 'due_date': None,
34
+ 'account_number': None,
35
+ 'services': []
36
+ }
37
+
38
+ # Look for total amount
39
+ amount_patterns = [
40
+ r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
41
+ r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
42
+ r'\$([0-9,]+\.?[0-9]*)',
43
+ ]
44
+
45
+ for pattern in amount_patterns:
46
+ match = re.search(pattern, extracted_text, re.IGNORECASE)
47
+ if match:
48
+ try:
49
+ parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
50
+ break
51
+ except ValueError:
52
+ continue
53
+
54
+ # Look for due date
55
+ date_patterns = [
56
+ r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
57
+ r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
58
+ ]
59
+
60
+ for pattern in date_patterns:
61
+ match = re.search(pattern, extracted_text, re.IGNORECASE)
62
+ if match:
63
+ parsed_data['due_date'] = match.group(1)
64
+ break
65
+
66
+ # Look for account number
67
+ account_patterns = [
68
+ r'Account[:\s]*([0-9A-Z-]+)',
69
+ r'A/C[:\s]*([0-9A-Z-]+)',
70
+ ]
71
+
72
+ for pattern in account_patterns:
73
+ match = re.search(pattern, extracted_text, re.IGNORECASE)
74
+ if match:
75
+ parsed_data['account_number'] = match.group(1).strip()
76
+ break
77
+
78
+ return parsed_data
79
+
80
+ except Exception as e:
81
+ st.error(f"Error processing image: {e}")
82
+ return None
83
+
84
+ def main():
85
+ st.set_page_config(
86
+ page_title="Singtel Bill Scanner",
87
+ page_icon="πŸ“±",
88
+ layout="wide"
89
+ )
90
+
91
+ st.title("πŸ“± Singtel Bill Scanner")
92
+ st.markdown("### AI-Powered OCR for Singtel Bills")
93
+
94
+ st.markdown("""
95
+ Upload an image of your Singtel bill and extract key information automatically using AI!
96
+
97
+ **Features:**
98
+ - πŸ” Extract text from handwritten and printed bills
99
+ - πŸ’° Identify total amounts and charges
100
+ - πŸ“… Find due dates
101
+ - πŸ”’ Extract account numbers
102
+ """)
103
+
104
+ # Sidebar with instructions
105
+ with st.sidebar:
106
+ st.markdown("### πŸ“‹ Instructions")
107
+ st.markdown("""
108
+ 1. **Take a clear photo** of your Singtel bill
109
+ 2. **Upload the image** using the file uploader
110
+ 3. **Wait for processing** (may take a few seconds)
111
+ 4. **Review extracted information**
112
+
113
+ **Tips for better results:**
114
+ - Use good lighting
115
+ - Keep the image straight
116
+ - Ensure text is clearly visible
117
+ - Avoid shadows and glare
118
+ """)
119
+
120
+ st.markdown("### πŸ”§ Technical Details")
121
+ st.markdown("""
122
+ - **Model**: Microsoft TrOCR
123
+ - **Accuracy**: High for clear images
124
+ - **Processing**: ~3-5 seconds
125
+ - **Privacy**: Images not stored
126
+ """)
127
+
128
+ # Main content area
129
+ col1, col2 = st.columns([1, 1])
130
+
131
+ with col1:
132
+ st.markdown("### πŸ“€ Upload Bill Image")
133
+
134
+ uploaded_file = st.file_uploader(
135
+ "Choose a bill image...",
136
+ type=['png', 'jpg', 'jpeg'],
137
+ help="Upload a clear image of your Singtel bill"
138
+ )
139
+
140
+ if uploaded_file is not None:
141
+ # Display the uploaded image
142
+ image = Image.open(uploaded_file)
143
+ st.image(image, caption="Uploaded Bill", use_column_width=True)
144
+
145
+ # Process button
146
+ if st.button("πŸ” Extract Information", type="primary"):
147
+ with st.spinner("Processing image with AI..."):
148
+ # Load model
149
+ pipe = load_ocr_model()
150
+
151
+ # Process image
152
+ result = process_bill_image(image, pipe)
153
+
154
+ if result:
155
+ st.session_state['processing_result'] = result
156
+ st.success("βœ… Processing completed!")
157
+
158
+ with col2:
159
+ st.markdown("### πŸ“Š Extracted Information")
160
+
161
+ if 'processing_result' in st.session_state:
162
+ result = st.session_state['processing_result']
163
+
164
+ # Display parsed information
165
+ st.markdown("#### πŸ’° Bill Summary")
166
+
167
+ col_a, col_b = st.columns(2)
168
+
169
+ with col_a:
170
+ if result['total_amount']:
171
+ st.metric("Total Amount", f"${result['total_amount']:.2f}")
172
+ else:
173
+ st.metric("Total Amount", "Not detected")
174
+
175
+ with col_b:
176
+ if result['due_date']:
177
+ st.metric("Due Date", result['due_date'])
178
+ else:
179
+ st.metric("Due Date", "Not detected")
180
+
181
+ if result['account_number']:
182
+ st.markdown(f"**Account Number:** {result['account_number']}")
183
+ else:
184
+ st.markdown("**Account Number:** Not detected")
185
+
186
+ # Raw extracted text
187
+ st.markdown("#### πŸ“ Raw Extracted Text")
188
+ st.text_area(
189
+ "Full text extracted from image:",
190
+ value=result['raw_text'],
191
+ height=150,
192
+ disabled=True
193
+ )
194
+
195
+ # Download option
196
+ st.markdown("#### πŸ’Ύ Export Data")
197
+ import json
198
+ json_data = json.dumps(result, indent=2)
199
+ st.download_button(
200
+ label="πŸ“„ Download as JSON",
201
+ data=json_data,
202
+ file_name="bill_data.json",
203
+ mime="application/json"
204
+ )
205
+
206
+ else:
207
+ st.info("πŸ‘† Upload an image and click 'Extract Information' to see results here")
208
+
209
+ # Footer
210
+ st.markdown("---")
211
+ st.markdown("""
212
+ <div style='text-align: center'>
213
+ <p>Built with ❀️ using Streamlit and Hugging Face Transformers</p>
214
+ <p>πŸ€– Powered by Microsoft TrOCR | πŸ”’ Your images are processed locally and not stored</p>
215
+ </div>
216
+ """, unsafe_allow_html=True)
217
+
218
+ if __name__ == "__main__":
219
+ main()