|
"""
|
|
Hugging Face Upload Setup and Helper
|
|
This script helps you push your Singtel Bill Scanner to Hugging Face
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
from huggingface_hub import HfApi
|
|
|
|
def check_huggingface_setup():
|
|
"""Check if Hugging Face is properly set up"""
|
|
print("π Checking Hugging Face setup...")
|
|
|
|
|
|
try:
|
|
import huggingface_hub
|
|
print(f"β
huggingface-hub installed (version: {huggingface_hub.__version__})")
|
|
except ImportError:
|
|
print("β huggingface-hub not installed")
|
|
return False
|
|
|
|
|
|
token = os.getenv("HF_TOKEN")
|
|
if token:
|
|
print("β
HF_TOKEN environment variable found")
|
|
|
|
try:
|
|
api = HfApi(token=token)
|
|
user_info = api.whoami()
|
|
print(f"β
Token valid for user: {user_info['name']}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"β Token invalid: {e}")
|
|
return False
|
|
else:
|
|
print("β HF_TOKEN environment variable not set")
|
|
return False
|
|
|
|
def setup_huggingface_token():
|
|
"""Guide user through setting up HF token"""
|
|
print("\nπ HUGGING FACE TOKEN SETUP")
|
|
print("=" * 40)
|
|
print("1. Go to: https://huggingface.co/settings/tokens")
|
|
print("2. Create a new token (or copy existing one)")
|
|
print("3. Choose 'Write' permissions")
|
|
print("4. Copy the token")
|
|
print()
|
|
|
|
token = input("Paste your Hugging Face token here: ").strip()
|
|
|
|
if token:
|
|
|
|
os.environ["HF_TOKEN"] = token
|
|
|
|
|
|
try:
|
|
print("\nπ§ Setting token in environment...")
|
|
cmd = f'[Environment]::SetEnvironmentVariable("HF_TOKEN", "{token}", "User")'
|
|
subprocess.run(["powershell", "-Command", cmd], check=True)
|
|
print("β
Token saved to user environment variables")
|
|
print("π‘ You may need to restart VS Code to see the change")
|
|
except Exception as e:
|
|
print(f"β οΈ Could not save permanently: {e}")
|
|
print("π‘ Token is set for this session only")
|
|
|
|
|
|
try:
|
|
api = HfApi(token=token)
|
|
user_info = api.whoami()
|
|
print(f"β
Token works! Logged in as: {user_info['name']}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"β Token test failed: {e}")
|
|
return False
|
|
else:
|
|
print("β No token provided")
|
|
return False
|
|
|
|
def create_model_card():
|
|
"""Create a README.md for Hugging Face"""
|
|
readme_content = """---
|
|
title: Singtel Bill Scanner
|
|
emoji: π±
|
|
colorFrom: red
|
|
colorTo: orange
|
|
sdk: streamlit
|
|
sdk_version: 1.28.0
|
|
app_file: app.py
|
|
pinned: false
|
|
tags:
|
|
- computer-vision
|
|
- ocr
|
|
- trocr
|
|
- bill-processing
|
|
- singtel
|
|
- document-ai
|
|
---
|
|
|
|
# Singtel Bill Scanner π±π‘
|
|
|
|
An AI-powered optical character recognition (OCR) system specifically designed for processing Singtel telecommunications bills. This project uses Microsoft's TrOCR (Transformer-based OCR) model to extract text from bill images and parse key information.
|
|
|
|
## Features
|
|
|
|
- π **Text Extraction**: Uses TrOCR for accurate text recognition from handwritten and printed text
|
|
- π **Bill Parsing**: Automatically extracts key information like:
|
|
- Total amount due
|
|
- Due date
|
|
- Account number
|
|
- Service charges
|
|
- Billing period
|
|
- π **Easy to Use**: Simple pipeline interface
|
|
- β‘ **Fast Processing**: Cached models for instant subsequent runs
|
|
- π― **Singtel Specific**: Optimized patterns for Singtel bill formats
|
|
|
|
## Models Used
|
|
|
|
- **TrOCR**: `microsoft/trocr-base-handwritten` - For text extraction
|
|
- **LayoutLMv3**: `microsoft/layoutlmv3-base` - For document structure understanding
|
|
|
|
## Quick Start
|
|
|
|
```python
|
|
from transformers import pipeline
|
|
from PIL import Image
|
|
|
|
# Initialize the OCR pipeline
|
|
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
# Process your bill image
|
|
image = Image.open("your_singtel_bill.jpg")
|
|
result = pipe(image)
|
|
extracted_text = result[0]['generated_text']
|
|
|
|
print(f"Extracted text: {extracted_text}")
|
|
```
|
|
|
|
## Advanced Usage
|
|
|
|
Use the `SingtelBillScanner` class for comprehensive bill processing:
|
|
|
|
```python
|
|
from singtel_scanner import SingtelBillScanner
|
|
|
|
# Initialize scanner
|
|
scanner = SingtelBillScanner()
|
|
|
|
# Process bill and get structured data
|
|
result = scanner.process_bill("bill_image.jpg")
|
|
|
|
print(f"Total Amount: ${result['total_amount']}")
|
|
print(f"Due Date: {result['due_date']}")
|
|
print(f"Account: {result['account_number']}")
|
|
```
|
|
|
|
## Installation
|
|
|
|
```bash
|
|
pip install torch transformers Pillow requests huggingface-hub
|
|
```
|
|
|
|
## Files
|
|
|
|
- `singtel_scanner.py` - Main scanner class with bill parsing
|
|
- `test_model.py` - Comprehensive testing and examples
|
|
- `quick_test.py` - Simple test script
|
|
- `working_example.py` - Basic functionality demonstration
|
|
- `requirements.txt` - Package dependencies
|
|
|
|
## Performance
|
|
|
|
- **Model Size**: ~1.3GB (downloaded once, cached forever)
|
|
- **Processing Time**: ~2-5 seconds per image (after initial load)
|
|
- **Accuracy**: High accuracy for clear, well-lit bill images
|
|
|
|
## Use Cases
|
|
|
|
- π± **Personal Finance**: Track Singtel bills automatically
|
|
- π’ **Business Automation**: Process multiple bills in batch
|
|
- π **Expense Management**: Extract data for accounting systems
|
|
- π **Document Digitization**: Convert physical bills to digital records
|
|
|
|
## Requirements
|
|
|
|
- Python 3.8+
|
|
- ~2GB free disk space (for models)
|
|
- Good internet connection (for initial model download)
|
|
|
|
## Contributing
|
|
|
|
Contributions welcome! Areas for improvement:
|
|
- Additional bill format support
|
|
- Enhanced parsing accuracy
|
|
- Mobile app integration
|
|
- Batch processing optimization
|
|
|
|
## License
|
|
|
|
This project is open source. Models are subject to their respective licenses:
|
|
- TrOCR: MIT License
|
|
- LayoutLMv3: MIT License
|
|
|
|
---
|
|
|
|
*Created with β€οΈ for the Singtel community*
|
|
"""
|
|
|
|
with open("README.md", "w", encoding="utf-8") as f:
|
|
f.write(readme_content)
|
|
|
|
print("β
Model card (README.md) created successfully!")
|
|
|
|
def upload_to_huggingface():
|
|
"""Upload the project to Hugging Face"""
|
|
print("\nπ UPLOADING TO HUGGING FACE")
|
|
print("=" * 40)
|
|
|
|
try:
|
|
token = os.getenv("HF_TOKEN")
|
|
api = HfApi(token=token)
|
|
|
|
print("π Preparing files for upload...")
|
|
|
|
|
|
if not os.path.exists("README.md"):
|
|
create_model_card()
|
|
|
|
print("π€ Starting upload...")
|
|
api.upload_folder(
|
|
folder_path=".",
|
|
repo_id="Cosmo125/Singtel_Bill_Scanner",
|
|
repo_type="space",
|
|
ignore_patterns=[
|
|
"*.pyc",
|
|
"__pycache__/",
|
|
".venv/",
|
|
"*.jpg",
|
|
"*.png",
|
|
"*.jpeg",
|
|
".git/",
|
|
"test_*.png",
|
|
"sample_*.jpg"
|
|
],
|
|
commit_message="Upload Singtel Bill Scanner - AI OCR for bill processing"
|
|
)
|
|
|
|
print("\nπ SUCCESS! Upload completed!")
|
|
print("π Your project is available at:")
|
|
print(" https://huggingface.co/spaces/Cosmo125/Singtel_Bill_Scanner")
|
|
print()
|
|
print("π‘ It may take a few minutes to build and become available")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"β Upload failed: {e}")
|
|
print("\nπ§ Troubleshooting:")
|
|
print("1. Check your internet connection")
|
|
print("2. Verify your HF token has write permissions")
|
|
print("3. Make sure the repository name is available")
|
|
return False
|
|
|
|
def main():
|
|
"""Main function to handle the upload process"""
|
|
print("SINGTEL BILL SCANNER - HUGGING FACE UPLOAD")
|
|
print("=" * 50)
|
|
|
|
|
|
if check_huggingface_setup():
|
|
print("\nβ
Setup looks good!")
|
|
|
|
choice = input("\nDo you want to upload to Hugging Face now? (y/n): ").lower()
|
|
if choice in ['y', 'yes']:
|
|
upload_to_huggingface()
|
|
else:
|
|
print("Upload cancelled.")
|
|
else:
|
|
print("\nπ§ Setup needed!")
|
|
choice = input("Do you want to set up your HF token now? (y/n): ").lower()
|
|
if choice in ['y', 'yes']:
|
|
if setup_huggingface_token():
|
|
print("\nβ
Token setup complete!")
|
|
upload_choice = input("Upload to Hugging Face now? (y/n): ").lower()
|
|
if upload_choice in ['y', 'yes']:
|
|
upload_to_huggingface()
|
|
else:
|
|
print("β Token setup failed")
|
|
else:
|
|
print("Setup cancelled.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|