File size: 7,091 Bytes
5ff1fa8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
"""
Streamlit web app for Singtel Bill Scanner
This creates a user-friendly interface for the bill scanner
"""
import streamlit as st
from PIL import Image
import io
import base64
# Only import heavy libraries when needed
@st.cache_resource
def load_ocr_model():
"""Load the OCR model (cached for performance)"""
from transformers import pipeline
return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
def process_bill_image(image, pipe):
"""Process the uploaded bill image"""
try:
# Process with TrOCR
result = pipe(image)
extracted_text = result[0]['generated_text']
# Simple parsing
import re
#added something
# Extract key information
parsed_data = {
'raw_text': extracted_text,
'total_amount': None,
'due_date': None,
'account_number': None,
'services': []
}
# Look for total amount
amount_patterns = [
r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
r'\$([0-9,]+\.?[0-9]*)',
]
for pattern in amount_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
try:
parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
break
except ValueError:
continue
# Look for due date
date_patterns = [
r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
]
for pattern in date_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
parsed_data['due_date'] = match.group(1)
break
# Look for account number
account_patterns = [
r'Account[:\s]*([0-9A-Z-]+)',
r'A/C[:\s]*([0-9A-Z-]+)',
]
for pattern in account_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
parsed_data['account_number'] = match.group(1).strip()
break
return parsed_data
except Exception as e:
st.error(f"Error processing image: {e}")
return None
def main():
st.set_page_config(
page_title="Singtel Bill Scanner",
page_icon="π±",
layout="wide"
)
st.title("π± Singtel Bill Scanner")
st.markdown("### AI-Powered OCR for Singtel Bills")
st.markdown("""
Upload an image of your Singtel bill and extract key information automatically using AI!
**Features:**
- π Extract text from handwritten and printed bills
- π° Identify total amounts and charges
- π
Find due dates
- π’ Extract account numbers
""")
# Sidebar with instructions
with st.sidebar:
st.markdown("### π Instructions")
st.markdown("""
1. **Take a clear photo** of your Singtel bill
2. **Upload the image** using the file uploader
3. **Wait for processing** (may take a few seconds)
4. **Review extracted information**
**Tips for better results:**
- Use good lighting
- Keep the image straight
- Ensure text is clearly visible
- Avoid shadows and glare
""")
st.markdown("### π§ Technical Details")
st.markdown("""
- **Model**: Microsoft TrOCR
- **Accuracy**: High for clear images
- **Processing**: ~3-5 seconds
- **Privacy**: Images not stored
""")
# Main content area
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### π€ Upload Bill Image")
uploaded_file = st.file_uploader(
"Choose a bill image...",
type=['png', 'jpg', 'jpeg'],
help="Upload a clear image of your Singtel bill"
)
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Bill", use_column_width=True)
# Process button
if st.button("π Extract Information", type="primary"):
with st.spinner("Processing image with AI..."):
# Load model
pipe = load_ocr_model()
# Process image
result = process_bill_image(image, pipe)
if result:
st.session_state['processing_result'] = result
st.success("β
Processing completed!")
with col2:
st.markdown("### π Extracted Information")
if 'processing_result' in st.session_state:
result = st.session_state['processing_result']
# Display parsed information
st.markdown("#### π° Bill Summary")
col_a, col_b = st.columns(2)
with col_a:
if result['total_amount']:
st.metric("Total Amount", f"${result['total_amount']:.2f}")
else:
st.metric("Total Amount", "Not detected")
with col_b:
if result['due_date']:
st.metric("Due Date", result['due_date'])
else:
st.metric("Due Date", "Not detected")
if result['account_number']:
st.markdown(f"**Account Number:** {result['account_number']}")
else:
st.markdown("**Account Number:** Not detected")
# Raw extracted text
st.markdown("#### π Raw Extracted Text")
st.text_area(
"Full text extracted from image:",
value=result['raw_text'],
height=150,
disabled=True
)
# Download option
st.markdown("#### πΎ Export Data")
import json
json_data = json.dumps(result, indent=2)
st.download_button(
label="π Download as JSON",
data=json_data,
file_name="bill_data.json",
mime="application/json"
)
else:
st.info("π Upload an image and click 'Extract Information' to see results here")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
<p>Built with β€οΈ using Streamlit and Hugging Face Transformers</p>
<p>π€ Powered by Microsoft TrOCR | π Your images are processed locally and not stored</p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()
|