Spaces:

NTU-Peak-2
/

SIngtel-Bill-Scanner

Running

File size: 7,091 Bytes

5ff1fa8

"""
Streamlit web app for Singtel Bill Scanner
This creates a user-friendly interface for the bill scanner
"""

import streamlit as st
from PIL import Image
import io
import base64

# Only import heavy libraries when needed
@st.cache_resource
def load_ocr_model():
    """Load the OCR model (cached for performance)"""
    from transformers import pipeline
    return pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

def process_bill_image(image, pipe):
    """Process the uploaded bill image"""
    try:
        # Process with TrOCR
        result = pipe(image)
        extracted_text = result[0]['generated_text']
        
        # Simple parsing
        import re
        #added something
        
        # Extract key information
        parsed_data = {
            'raw_text': extracted_text,
            'total_amount': None,
            'due_date': None,
            'account_number': None,
            'services': []
        }
        
        # Look for total amount
        amount_patterns = [
            r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)',
            r'\$([0-9,]+\.?[0-9]*)',
        ]
        
        for pattern in amount_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                try:
                    parsed_data['total_amount'] = float(match.group(1).replace(',', ''))
                    break
                except ValueError:
                    continue
        
        # Look for due date
        date_patterns = [
            r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
            r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})',
        ]
        
        for pattern in date_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                parsed_data['due_date'] = match.group(1)
                break
        
        # Look for account number
        account_patterns = [
            r'Account[:\s]*([0-9A-Z-]+)',
            r'A/C[:\s]*([0-9A-Z-]+)',
        ]
        
        for pattern in account_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                parsed_data['account_number'] = match.group(1).strip()
                break
        
        return parsed_data
        
    except Exception as e:
        st.error(f"Error processing image: {e}")
        return None

def main():
    st.set_page_config(
        page_title="Singtel Bill Scanner",
        page_icon="📱",
        layout="wide"
    )
    
    st.title("📱 Singtel Bill Scanner")
    st.markdown("### AI-Powered OCR for Singtel Bills")
    
    st.markdown("""
    Upload an image of your Singtel bill and extract key information automatically using AI!
    
    **Features:**
    - 🔍 Extract text from handwritten and printed bills
    - 💰 Identify total amounts and charges
    - 📅 Find due dates
    - 🔢 Extract account numbers
    """)
    
    # Sidebar with instructions
    with st.sidebar:
        st.markdown("### 📋 Instructions")
        st.markdown("""
        1. **Take a clear photo** of your Singtel bill
        2. **Upload the image** using the file uploader
        3. **Wait for processing** (may take a few seconds)
        4. **Review extracted information**
        
        **Tips for better results:**
        - Use good lighting
        - Keep the image straight
        - Ensure text is clearly visible
        - Avoid shadows and glare
        """)
        
        st.markdown("### 🔧 Technical Details")
        st.markdown("""
        - **Model**: Microsoft TrOCR
        - **Accuracy**: High for clear images
        - **Processing**: ~3-5 seconds
        - **Privacy**: Images not stored
        """)
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.markdown("### 📤 Upload Bill Image")
        
        uploaded_file = st.file_uploader(
            "Choose a bill image...",
            type=['png', 'jpg', 'jpeg'],
            help="Upload a clear image of your Singtel bill"
        )
        
        if uploaded_file is not None:
            # Display the uploaded image
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Bill", use_column_width=True)
            
            # Process button
            if st.button("🔍 Extract Information", type="primary"):
                with st.spinner("Processing image with AI..."):
                    # Load model
                    pipe = load_ocr_model()
                    
                    # Process image
                    result = process_bill_image(image, pipe)
                    
                    if result:
                        st.session_state['processing_result'] = result
                        st.success("✅ Processing completed!")
    
    with col2:
        st.markdown("### 📊 Extracted Information")
        
        if 'processing_result' in st.session_state:
            result = st.session_state['processing_result']
            
            # Display parsed information
            st.markdown("#### 💰 Bill Summary")
            
            col_a, col_b = st.columns(2)
            
            with col_a:
                if result['total_amount']:
                    st.metric("Total Amount", f"${result['total_amount']:.2f}")
                else:
                    st.metric("Total Amount", "Not detected")
            
            with col_b:
                if result['due_date']:
                    st.metric("Due Date", result['due_date'])
                else:
                    st.metric("Due Date", "Not detected")
            
            if result['account_number']:
                st.markdown(f"**Account Number:** {result['account_number']}")
            else:
                st.markdown("**Account Number:** Not detected")
            
            # Raw extracted text
            st.markdown("#### 📝 Raw Extracted Text")
            st.text_area(
                "Full text extracted from image:",
                value=result['raw_text'],
                height=150,
                disabled=True
            )
            
            # Download option
            st.markdown("#### 💾 Export Data")
            import json
            json_data = json.dumps(result, indent=2)
            st.download_button(
                label="📄 Download as JSON",
                data=json_data,
                file_name="bill_data.json",
                mime="application/json"
            )
            
        else:
            st.info("👆 Upload an image and click 'Extract Information' to see results here")
    
    # Footer
    st.markdown("---")
    st.markdown("""
    <div style='text-align: center'>
        <p>Built with ❤️ using Streamlit and Hugging Face Transformers</p>
        <p>🤖 Powered by Microsoft TrOCR | 🔒 Your images are processed locally and not stored</p>
    </div>
    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()