""" Streamlit web app for Singtel Bill Scanner This creates a user-friendly interface for the bill scanner """ import streamlit as st from PIL import Image import io import base64 # Only import heavy libraries when needed @st.cache_resource def load_ocr_model(): """Load the OCR model (cached for performance)""" from transformers import pipeline return pipeline("image-to-text", model="microsoft/trocr-base-handwritten") def process_bill_image(image, pipe): """Process the uploaded bill image""" try: # Process with TrOCR result = pipe(image) extracted_text = result[0]['generated_text'] # Simple parsing import re #added something # Extract key information parsed_data = { 'raw_text': extracted_text, 'total_amount': None, 'due_date': None, 'account_number': None, 'services': [] } # Look for total amount amount_patterns = [ r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)', r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)', r'\$([0-9,]+\.?[0-9]*)', ] for pattern in amount_patterns: match = re.search(pattern, extracted_text, re.IGNORECASE) if match: try: parsed_data['total_amount'] = float(match.group(1).replace(',', '')) break except ValueError: continue # Look for due date date_patterns = [ r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', ] for pattern in date_patterns: match = re.search(pattern, extracted_text, re.IGNORECASE) if match: parsed_data['due_date'] = match.group(1) break # Look for account number account_patterns = [ r'Account[:\s]*([0-9A-Z-]+)', r'A/C[:\s]*([0-9A-Z-]+)', ] for pattern in account_patterns: match = re.search(pattern, extracted_text, re.IGNORECASE) if match: parsed_data['account_number'] = match.group(1).strip() break return parsed_data except Exception as e: st.error(f"Error processing image: {e}") return None def main(): st.set_page_config( page_title="Singtel Bill Scanner", page_icon="📱", layout="wide" ) st.title("📱 Singtel Bill Scanner") st.markdown("### AI-Powered OCR for Singtel Bills") st.markdown(""" Upload an image of your Singtel bill and extract key information automatically using AI! **Features:** - 🔍 Extract text from handwritten and printed bills - 💰 Identify total amounts and charges - 📅 Find due dates - 🔢 Extract account numbers """) # Sidebar with instructions with st.sidebar: st.markdown("### 📋 Instructions") st.markdown(""" 1. **Take a clear photo** of your Singtel bill 2. **Upload the image** using the file uploader 3. **Wait for processing** (may take a few seconds) 4. **Review extracted information** **Tips for better results:** - Use good lighting - Keep the image straight - Ensure text is clearly visible - Avoid shadows and glare """) st.markdown("### 🔧 Technical Details") st.markdown(""" - **Model**: Microsoft TrOCR - **Accuracy**: High for clear images - **Processing**: ~3-5 seconds - **Privacy**: Images not stored """) # Main content area col1, col2 = st.columns([1, 1]) with col1: st.markdown("### 📤 Upload Bill Image") uploaded_file = st.file_uploader( "Choose a bill image...", type=['png', 'jpg', 'jpeg'], help="Upload a clear image of your Singtel bill" ) if uploaded_file is not None: # Display the uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Bill", use_column_width=True) # Process button if st.button("🔍 Extract Information", type="primary"): with st.spinner("Processing image with AI..."): # Load model pipe = load_ocr_model() # Process image result = process_bill_image(image, pipe) if result: st.session_state['processing_result'] = result st.success("✅ Processing completed!") with col2: st.markdown("### 📊 Extracted Information") if 'processing_result' in st.session_state: result = st.session_state['processing_result'] # Display parsed information st.markdown("#### 💰 Bill Summary") col_a, col_b = st.columns(2) with col_a: if result['total_amount']: st.metric("Total Amount", f"${result['total_amount']:.2f}") else: st.metric("Total Amount", "Not detected") with col_b: if result['due_date']: st.metric("Due Date", result['due_date']) else: st.metric("Due Date", "Not detected") if result['account_number']: st.markdown(f"**Account Number:** {result['account_number']}") else: st.markdown("**Account Number:** Not detected") # Raw extracted text st.markdown("#### 📝 Raw Extracted Text") st.text_area( "Full text extracted from image:", value=result['raw_text'], height=150, disabled=True ) # Download option st.markdown("#### 💾 Export Data") import json json_data = json.dumps(result, indent=2) st.download_button( label="📄 Download as JSON", data=json_data, file_name="bill_data.json", mime="application/json" ) else: st.info("👆 Upload an image and click 'Extract Information' to see results here") # Footer st.markdown("---") st.markdown("""
Built with ❤️ using Streamlit and Hugging Face Transformers
🤖 Powered by Microsoft TrOCR | 🔒 Your images are processed locally and not stored