SIngtel-Bill-Scanner / hf_upload.py
Cosmo125's picture
Upload 26 files
795183d verified
"""
Hugging Face Upload Setup and Helper
This script helps you push your Singtel Bill Scanner to Hugging Face
"""
import os
import subprocess
from huggingface_hub import HfApi
def check_huggingface_setup():
"""Check if Hugging Face is properly set up"""
print("πŸ” Checking Hugging Face setup...")
# Check if huggingface-hub is installed
try:
import huggingface_hub
print(f"βœ… huggingface-hub installed (version: {huggingface_hub.__version__})")
except ImportError:
print("❌ huggingface-hub not installed")
return False
# Check for token
token = os.getenv("HF_TOKEN")
if token:
print("βœ… HF_TOKEN environment variable found")
# Test token validity
try:
api = HfApi(token=token)
user_info = api.whoami()
print(f"βœ… Token valid for user: {user_info['name']}")
return True
except Exception as e:
print(f"❌ Token invalid: {e}")
return False
else:
print("❌ HF_TOKEN environment variable not set")
return False
def setup_huggingface_token():
"""Guide user through setting up HF token"""
print("\nπŸ”‘ HUGGING FACE TOKEN SETUP")
print("=" * 40)
print("1. Go to: https://huggingface.co/settings/tokens")
print("2. Create a new token (or copy existing one)")
print("3. Choose 'Write' permissions")
print("4. Copy the token")
print()
token = input("Paste your Hugging Face token here: ").strip()
if token:
# Set environment variable for this session
os.environ["HF_TOKEN"] = token
# Try to set it permanently in PowerShell
try:
print("\nπŸ”§ Setting token in environment...")
cmd = f'[Environment]::SetEnvironmentVariable("HF_TOKEN", "{token}", "User")'
subprocess.run(["powershell", "-Command", cmd], check=True)
print("βœ… Token saved to user environment variables")
print("πŸ’‘ You may need to restart VS Code to see the change")
except Exception as e:
print(f"⚠️ Could not save permanently: {e}")
print("πŸ’‘ Token is set for this session only")
# Test the token
try:
api = HfApi(token=token)
user_info = api.whoami()
print(f"βœ… Token works! Logged in as: {user_info['name']}")
return True
except Exception as e:
print(f"❌ Token test failed: {e}")
return False
else:
print("❌ No token provided")
return False
def create_model_card():
"""Create a README.md for Hugging Face"""
readme_content = """---
title: Singtel Bill Scanner
emoji: πŸ“±
colorFrom: red
colorTo: orange
sdk: streamlit
sdk_version: 1.28.0
app_file: app.py
pinned: false
tags:
- computer-vision
- ocr
- trocr
- bill-processing
- singtel
- document-ai
---
# Singtel Bill Scanner πŸ“±πŸ’‘
An AI-powered optical character recognition (OCR) system specifically designed for processing Singtel telecommunications bills. This project uses Microsoft's TrOCR (Transformer-based OCR) model to extract text from bill images and parse key information.
## Features
- πŸ” **Text Extraction**: Uses TrOCR for accurate text recognition from handwritten and printed text
- πŸ“Š **Bill Parsing**: Automatically extracts key information like:
- Total amount due
- Due date
- Account number
- Service charges
- Billing period
- πŸš€ **Easy to Use**: Simple pipeline interface
- ⚑ **Fast Processing**: Cached models for instant subsequent runs
- 🎯 **Singtel Specific**: Optimized patterns for Singtel bill formats
## Models Used
- **TrOCR**: `microsoft/trocr-base-handwritten` - For text extraction
- **LayoutLMv3**: `microsoft/layoutlmv3-base` - For document structure understanding
## Quick Start
```python
from transformers import pipeline
from PIL import Image
# Initialize the OCR pipeline
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
# Process your bill image
image = Image.open("your_singtel_bill.jpg")
result = pipe(image)
extracted_text = result[0]['generated_text']
print(f"Extracted text: {extracted_text}")
```
## Advanced Usage
Use the `SingtelBillScanner` class for comprehensive bill processing:
```python
from singtel_scanner import SingtelBillScanner
# Initialize scanner
scanner = SingtelBillScanner()
# Process bill and get structured data
result = scanner.process_bill("bill_image.jpg")
print(f"Total Amount: ${result['total_amount']}")
print(f"Due Date: {result['due_date']}")
print(f"Account: {result['account_number']}")
```
## Installation
```bash
pip install torch transformers Pillow requests huggingface-hub
```
## Files
- `singtel_scanner.py` - Main scanner class with bill parsing
- `test_model.py` - Comprehensive testing and examples
- `quick_test.py` - Simple test script
- `working_example.py` - Basic functionality demonstration
- `requirements.txt` - Package dependencies
## Performance
- **Model Size**: ~1.3GB (downloaded once, cached forever)
- **Processing Time**: ~2-5 seconds per image (after initial load)
- **Accuracy**: High accuracy for clear, well-lit bill images
## Use Cases
- πŸ“± **Personal Finance**: Track Singtel bills automatically
- 🏒 **Business Automation**: Process multiple bills in batch
- πŸ“Š **Expense Management**: Extract data for accounting systems
- πŸ” **Document Digitization**: Convert physical bills to digital records
## Requirements
- Python 3.8+
- ~2GB free disk space (for models)
- Good internet connection (for initial model download)
## Contributing
Contributions welcome! Areas for improvement:
- Additional bill format support
- Enhanced parsing accuracy
- Mobile app integration
- Batch processing optimization
## License
This project is open source. Models are subject to their respective licenses:
- TrOCR: MIT License
- LayoutLMv3: MIT License
---
*Created with ❀️ for the Singtel community*
"""
with open("README.md", "w", encoding="utf-8") as f:
f.write(readme_content)
print("βœ… Model card (README.md) created successfully!")
def upload_to_huggingface():
"""Upload the project to Hugging Face"""
print("\nπŸš€ UPLOADING TO HUGGING FACE")
print("=" * 40)
try:
token = os.getenv("HF_TOKEN")
api = HfApi(token=token)
print("πŸ“ Preparing files for upload...")
# Create model card if it doesn't exist
if not os.path.exists("README.md"):
create_model_card()
print("πŸ“€ Starting upload...")
api.upload_folder(
folder_path=".",
repo_id="Cosmo125/Singtel_Bill_Scanner",
repo_type="space", # Changed to 'space' for better visibility
ignore_patterns=[
"*.pyc",
"__pycache__/",
".venv/",
"*.jpg",
"*.png",
"*.jpeg",
".git/",
"test_*.png",
"sample_*.jpg"
],
commit_message="Upload Singtel Bill Scanner - AI OCR for bill processing"
)
print("\nπŸŽ‰ SUCCESS! Upload completed!")
print("πŸ”— Your project is available at:")
print(" https://huggingface.co/spaces/Cosmo125/Singtel_Bill_Scanner")
print()
print("πŸ’‘ It may take a few minutes to build and become available")
return True
except Exception as e:
print(f"❌ Upload failed: {e}")
print("\nπŸ”§ Troubleshooting:")
print("1. Check your internet connection")
print("2. Verify your HF token has write permissions")
print("3. Make sure the repository name is available")
return False
def main():
"""Main function to handle the upload process"""
print("SINGTEL BILL SCANNER - HUGGING FACE UPLOAD")
print("=" * 50)
# Check setup
if check_huggingface_setup():
print("\nβœ… Setup looks good!")
choice = input("\nDo you want to upload to Hugging Face now? (y/n): ").lower()
if choice in ['y', 'yes']:
upload_to_huggingface()
else:
print("Upload cancelled.")
else:
print("\nπŸ”§ Setup needed!")
choice = input("Do you want to set up your HF token now? (y/n): ").lower()
if choice in ['y', 'yes']:
if setup_huggingface_token():
print("\nβœ… Token setup complete!")
upload_choice = input("Upload to Hugging Face now? (y/n): ").lower()
if upload_choice in ['y', 'yes']:
upload_to_huggingface()
else:
print("❌ Token setup failed")
else:
print("Setup cancelled.")
if __name__ == "__main__":
main()