Cosmo125's picture
Added a comment (#2)
5ff1fa8 verified
"""
Streamlit web app for Singtel Bill Scanner
This creates a user-friendly interface for the bill scanner
"""
import streamlit as st
from PIL import Image
import io
import base64
# Only import heavy libraries when needed
@st.cache_resource
def load_ocr_model():
"""Load the OCR model (cached for performance)"""
from transformers import pipeline
return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
def process_bill_image(image, pipe):
"""Process the uploaded bill image"""
try:
# Process with TrOCR
result = pipe(image)
extracted_text = result[0]['generated_text']
# Simple parsing
import re
#added something
# Extract key information
parsed_data = {
'raw_text': extracted_text,
'total_amount': None,
'due_date': None,
'account_number': None,
'services': []
}
# Look for total amount
amount_patterns = [
r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
r'\$([0-9,]+\.?[0-9]*)',
]
for pattern in amount_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
try:
parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
break
except ValueError:
continue
# Look for due date
date_patterns = [
r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
]
for pattern in date_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
parsed_data['due_date'] = match.group(1)
break
# Look for account number
account_patterns = [
r'Account[:\s]*([0-9A-Z-]+)',
r'A/C[:\s]*([0-9A-Z-]+)',
]
for pattern in account_patterns:
match = re.search(pattern, extracted_text, re.IGNORECASE)
if match:
parsed_data['account_number'] = match.group(1).strip()
break
return parsed_data
except Exception as e:
st.error(f"Error processing image: {e}")
return None
def main():
st.set_page_config(
page_title="Singtel Bill Scanner",
page_icon="πŸ“±",
layout="wide"
)
st.title("πŸ“± Singtel Bill Scanner")
st.markdown("### AI-Powered OCR for Singtel Bills")
st.markdown("""
Upload an image of your Singtel bill and extract key information automatically using AI!
**Features:**
- πŸ” Extract text from handwritten and printed bills
- πŸ’° Identify total amounts and charges
- πŸ“… Find due dates
- πŸ”’ Extract account numbers
""")
# Sidebar with instructions
with st.sidebar:
st.markdown("### πŸ“‹ Instructions")
st.markdown("""
1. **Take a clear photo** of your Singtel bill
2. **Upload the image** using the file uploader
3. **Wait for processing** (may take a few seconds)
4. **Review extracted information**
**Tips for better results:**
- Use good lighting
- Keep the image straight
- Ensure text is clearly visible
- Avoid shadows and glare
""")
st.markdown("### πŸ”§ Technical Details")
st.markdown("""
- **Model**: Microsoft TrOCR
- **Accuracy**: High for clear images
- **Processing**: ~3-5 seconds
- **Privacy**: Images not stored
""")
# Main content area
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("### πŸ“€ Upload Bill Image")
uploaded_file = st.file_uploader(
"Choose a bill image...",
type=['png', 'jpg', 'jpeg'],
help="Upload a clear image of your Singtel bill"
)
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Bill", use_column_width=True)
# Process button
if st.button("πŸ” Extract Information", type="primary"):
with st.spinner("Processing image with AI..."):
# Load model
pipe = load_ocr_model()
# Process image
result = process_bill_image(image, pipe)
if result:
st.session_state['processing_result'] = result
st.success("βœ… Processing completed!")
with col2:
st.markdown("### πŸ“Š Extracted Information")
if 'processing_result' in st.session_state:
result = st.session_state['processing_result']
# Display parsed information
st.markdown("#### πŸ’° Bill Summary")
col_a, col_b = st.columns(2)
with col_a:
if result['total_amount']:
st.metric("Total Amount", f"${result['total_amount']:.2f}")
else:
st.metric("Total Amount", "Not detected")
with col_b:
if result['due_date']:
st.metric("Due Date", result['due_date'])
else:
st.metric("Due Date", "Not detected")
if result['account_number']:
st.markdown(f"**Account Number:** {result['account_number']}")
else:
st.markdown("**Account Number:** Not detected")
# Raw extracted text
st.markdown("#### πŸ“ Raw Extracted Text")
st.text_area(
"Full text extracted from image:",
value=result['raw_text'],
height=150,
disabled=True
)
# Download option
st.markdown("#### πŸ’Ύ Export Data")
import json
json_data = json.dumps(result, indent=2)
st.download_button(
label="πŸ“„ Download as JSON",
data=json_data,
file_name="bill_data.json",
mime="application/json"
)
else:
st.info("πŸ‘† Upload an image and click 'Extract Information' to see results here")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
<p>Built with ❀️ using Streamlit and Hugging Face Transformers</p>
<p>πŸ€– Powered by Microsoft TrOCR | πŸ”’ Your images are processed locally and not stored</p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()