|
""" |
|
Streamlit web app for Singtel Bill Scanner |
|
This creates a user-friendly interface for the bill scanner |
|
""" |
|
|
|
import streamlit as st |
|
from PIL import Image |
|
import io |
|
import base64 |
|
|
|
|
|
@st.cache_resource |
|
def load_ocr_model(): |
|
"""Load the OCR model (cached for performance)""" |
|
from transformers import pipeline |
|
return pipeline("image-to-text", model="microsoft/trocr-base-handwritten") |
|
|
|
def process_bill_image(image, pipe): |
|
"""Process the uploaded bill image""" |
|
try: |
|
|
|
result = pipe(image) |
|
extracted_text = result[0]['generated_text'] |
|
|
|
|
|
import re |
|
|
|
|
|
|
|
parsed_data = { |
|
'raw_text': extracted_text, |
|
'total_amount': None, |
|
'due_date': None, |
|
'account_number': None, |
|
'services': [] |
|
} |
|
|
|
|
|
amount_patterns = [ |
|
r'Total[:\s]*\$?([0-9,]+\.?[0-9]*)', |
|
r'Amount Due[:\s]*\$?([0-9,]+\.?[0-9]*)', |
|
r'\$([0-9,]+\.?[0-9]*)', |
|
] |
|
|
|
for pattern in amount_patterns: |
|
match = re.search(pattern, extracted_text, re.IGNORECASE) |
|
if match: |
|
try: |
|
parsed_data['total_amount'] = float(match.group(1).replace(',', '')) |
|
break |
|
except ValueError: |
|
continue |
|
|
|
|
|
date_patterns = [ |
|
r'Due[:\s]*(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', |
|
r'(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})', |
|
] |
|
|
|
for pattern in date_patterns: |
|
match = re.search(pattern, extracted_text, re.IGNORECASE) |
|
if match: |
|
parsed_data['due_date'] = match.group(1) |
|
break |
|
|
|
|
|
account_patterns = [ |
|
r'Account[:\s]*([0-9A-Z-]+)', |
|
r'A/C[:\s]*([0-9A-Z-]+)', |
|
] |
|
|
|
for pattern in account_patterns: |
|
match = re.search(pattern, extracted_text, re.IGNORECASE) |
|
if match: |
|
parsed_data['account_number'] = match.group(1).strip() |
|
break |
|
|
|
return parsed_data |
|
|
|
except Exception as e: |
|
st.error(f"Error processing image: {e}") |
|
return None |
|
|
|
def main(): |
|
st.set_page_config( |
|
page_title="Singtel Bill Scanner", |
|
page_icon="π±", |
|
layout="wide" |
|
) |
|
|
|
st.title("π± Singtel Bill Scanner") |
|
st.markdown("### AI-Powered OCR for Singtel Bills") |
|
|
|
st.markdown(""" |
|
Upload an image of your Singtel bill and extract key information automatically using AI! |
|
|
|
**Features:** |
|
- π Extract text from handwritten and printed bills |
|
- π° Identify total amounts and charges |
|
- π
Find due dates |
|
- π’ Extract account numbers |
|
""") |
|
|
|
|
|
with st.sidebar: |
|
st.markdown("### π Instructions") |
|
st.markdown(""" |
|
1. **Take a clear photo** of your Singtel bill |
|
2. **Upload the image** using the file uploader |
|
3. **Wait for processing** (may take a few seconds) |
|
4. **Review extracted information** |
|
|
|
**Tips for better results:** |
|
- Use good lighting |
|
- Keep the image straight |
|
- Ensure text is clearly visible |
|
- Avoid shadows and glare |
|
""") |
|
|
|
st.markdown("### π§ Technical Details") |
|
st.markdown(""" |
|
- **Model**: Microsoft TrOCR |
|
- **Accuracy**: High for clear images |
|
- **Processing**: ~3-5 seconds |
|
- **Privacy**: Images not stored |
|
""") |
|
|
|
|
|
col1, col2 = st.columns([1, 1]) |
|
|
|
with col1: |
|
st.markdown("### π€ Upload Bill Image") |
|
|
|
uploaded_file = st.file_uploader( |
|
"Choose a bill image...", |
|
type=['png', 'jpg', 'jpeg'], |
|
help="Upload a clear image of your Singtel bill" |
|
) |
|
|
|
if uploaded_file is not None: |
|
|
|
image = Image.open(uploaded_file) |
|
st.image(image, caption="Uploaded Bill", use_column_width=True) |
|
|
|
|
|
if st.button("π Extract Information", type="primary"): |
|
with st.spinner("Processing image with AI..."): |
|
|
|
pipe = load_ocr_model() |
|
|
|
|
|
result = process_bill_image(image, pipe) |
|
|
|
if result: |
|
st.session_state['processing_result'] = result |
|
st.success("β
Processing completed!") |
|
|
|
with col2: |
|
st.markdown("### π Extracted Information") |
|
|
|
if 'processing_result' in st.session_state: |
|
result = st.session_state['processing_result'] |
|
|
|
|
|
st.markdown("#### π° Bill Summary") |
|
|
|
col_a, col_b = st.columns(2) |
|
|
|
with col_a: |
|
if result['total_amount']: |
|
st.metric("Total Amount", f"${result['total_amount']:.2f}") |
|
else: |
|
st.metric("Total Amount", "Not detected") |
|
|
|
with col_b: |
|
if result['due_date']: |
|
st.metric("Due Date", result['due_date']) |
|
else: |
|
st.metric("Due Date", "Not detected") |
|
|
|
if result['account_number']: |
|
st.markdown(f"**Account Number:** {result['account_number']}") |
|
else: |
|
st.markdown("**Account Number:** Not detected") |
|
|
|
|
|
st.markdown("#### π Raw Extracted Text") |
|
st.text_area( |
|
"Full text extracted from image:", |
|
value=result['raw_text'], |
|
height=150, |
|
disabled=True |
|
) |
|
|
|
|
|
st.markdown("#### πΎ Export Data") |
|
import json |
|
json_data = json.dumps(result, indent=2) |
|
st.download_button( |
|
label="π Download as JSON", |
|
data=json_data, |
|
file_name="bill_data.json", |
|
mime="application/json" |
|
) |
|
|
|
else: |
|
st.info("π Upload an image and click 'Extract Information' to see results here") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
<div style='text-align: center'> |
|
<p>Built with β€οΈ using Streamlit and Hugging Face Transformers</p> |
|
<p>π€ Powered by Microsoft TrOCR | π Your images are processed locally and not stored</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|