Spaces:

NTU-Peak-2
/

SIngtel-Bill-Scanner

Running

App Files Files Community

SIngtel-Bill-Scanner / hf_upload.py

Cosmo125

Upload 26 files

795183d verified about 1 month ago

raw

history blame contribute delete

9.22 kB

	"""
	Hugging Face Upload Setup and Helper
	This script helps you push your Singtel Bill Scanner to Hugging Face
	"""

	import os
	import subprocess
	from huggingface_hub import HfApi

	def check_huggingface_setup():
	"""Check if Hugging Face is properly set up"""
	print("🔍 Checking Hugging Face setup...")

	# Check if huggingface-hub is installed
	try:
	import huggingface_hub
	print(f"✅ huggingface-hub installed (version: {huggingface_hub.__version__})")
	except ImportError:
	print("❌ huggingface-hub not installed")
	return False

	# Check for token
	token = os.getenv("HF_TOKEN")
	if token:
	print("✅ HF_TOKEN environment variable found")
	# Test token validity
	try:
	api = HfApi(token=token)
	user_info = api.whoami()
	print(f"✅ Token valid for user: {user_info['name']}")
	return True
	except Exception as e:
	print(f"❌ Token invalid: {e}")
	return False
	else:
	print("❌ HF_TOKEN environment variable not set")
	return False

	def setup_huggingface_token():
	"""Guide user through setting up HF token"""
	print("\n🔑 HUGGING FACE TOKEN SETUP")
	print("=" * 40)
	print("1. Go to: https://huggingface.co/settings/tokens")
	print("2. Create a new token (or copy existing one)")
	print("3. Choose 'Write' permissions")
	print("4. Copy the token")
	print()

	token = input("Paste your Hugging Face token here: ").strip()

	if token:
	# Set environment variable for this session
	os.environ["HF_TOKEN"] = token

	# Try to set it permanently in PowerShell
	try:
	print("\n🔧 Setting token in environment...")
	cmd = f'[Environment]::SetEnvironmentVariable("HF_TOKEN", "{token}", "User")'
	subprocess.run(["powershell", "-Command", cmd], check=True)
	print("✅ Token saved to user environment variables")
	print("💡 You may need to restart VS Code to see the change")
	except Exception as e:
	print(f"⚠️ Could not save permanently: {e}")
	print("💡 Token is set for this session only")

	# Test the token
	try:
	api = HfApi(token=token)
	user_info = api.whoami()
	print(f"✅ Token works! Logged in as: {user_info['name']}")
	return True
	except Exception as e:
	print(f"❌ Token test failed: {e}")
	return False
	else:
	print("❌ No token provided")
	return False

	def create_model_card():
	"""Create a README.md for Hugging Face"""
	readme_content = """---
	title: Singtel Bill Scanner
	emoji: 📱
	colorFrom: red
	colorTo: orange
	sdk: streamlit
	sdk_version: 1.28.0
	app_file: app.py
	pinned: false
	tags:
	- computer-vision
	- ocr
	- trocr
	- bill-processing
	- singtel
	- document-ai
	---

	# Singtel Bill Scanner 📱💡

	An AI-powered optical character recognition (OCR) system specifically designed for processing Singtel telecommunications bills. This project uses Microsoft's TrOCR (Transformer-based OCR) model to extract text from bill images and parse key information.

	## Features

	- 🔍 Text Extraction: Uses TrOCR for accurate text recognition from handwritten and printed text
	- 📊 Bill Parsing: Automatically extracts key information like:
	- Total amount due
	- Due date
	- Account number
	- Service charges
	- Billing period
	- 🚀 Easy to Use: Simple pipeline interface
	- ⚡ Fast Processing: Cached models for instant subsequent runs
	- 🎯 Singtel Specific: Optimized patterns for Singtel bill formats

	## Models Used

	- TrOCR: `microsoft/trocr-base-handwritten` - For text extraction
	- LayoutLMv3: `microsoft/layoutlmv3-base` - For document structure understanding

	## Quick Start

	```python
	from transformers import pipeline
	from PIL import Image

	# Initialize the OCR pipeline
	pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

	# Process your bill image
	image = Image.open("your_singtel_bill.jpg")
	result = pipe(image)
	extracted_text = result[0]['generated_text']

	print(f"Extracted text: {extracted_text}")
	```

	## Advanced Usage

	Use the `SingtelBillScanner` class for comprehensive bill processing:

	```python
	from singtel_scanner import SingtelBillScanner

	# Initialize scanner
	scanner = SingtelBillScanner()

	# Process bill and get structured data
	result = scanner.process_bill("bill_image.jpg")

	print(f"Total Amount: ${result['total_amount']}")
	print(f"Due Date: {result['due_date']}")
	print(f"Account: {result['account_number']}")
	```

	## Installation

	```bash
	pip install torch transformers Pillow requests huggingface-hub
	```

	## Files

	- `singtel_scanner.py` - Main scanner class with bill parsing
	- `test_model.py` - Comprehensive testing and examples
	- `quick_test.py` - Simple test script
	- `working_example.py` - Basic functionality demonstration
	- `requirements.txt` - Package dependencies

	## Performance

	- Model Size: ~1.3GB (downloaded once, cached forever)
	- Processing Time: ~2-5 seconds per image (after initial load)
	- Accuracy: High accuracy for clear, well-lit bill images

	## Use Cases

	- 📱 Personal Finance: Track Singtel bills automatically
	- 🏢 Business Automation: Process multiple bills in batch
	- 📊 Expense Management: Extract data for accounting systems
	- 🔍 Document Digitization: Convert physical bills to digital records

	## Requirements

	- Python 3.8+
	- ~2GB free disk space (for models)
	- Good internet connection (for initial model download)

	## Contributing

	Contributions welcome! Areas for improvement:
	- Additional bill format support
	- Enhanced parsing accuracy
	- Mobile app integration
	- Batch processing optimization

	## License

	This project is open source. Models are subject to their respective licenses:
	- TrOCR: MIT License
	- LayoutLMv3: MIT License

	---

	Created with ❤️ for the Singtel community
	"""

	with open("README.md", "w", encoding="utf-8") as f:
	f.write(readme_content)

	print("✅ Model card (README.md) created successfully!")

	def upload_to_huggingface():
	"""Upload the project to Hugging Face"""
	print("\n🚀 UPLOADING TO HUGGING FACE")
	print("=" * 40)

	try:
	token = os.getenv("HF_TOKEN")
	api = HfApi(token=token)

	print("📁 Preparing files for upload...")

	# Create model card if it doesn't exist
	if not os.path.exists("README.md"):
	create_model_card()

	print("📤 Starting upload...")
	api.upload_folder(
	folder_path=".",
	repo_id="Cosmo125/Singtel_Bill_Scanner",
	repo_type="space", # Changed to 'space' for better visibility
	ignore_patterns=[
	"*.pyc",
	"__pycache__/",
	".venv/",
	"*.jpg",
	"*.png",
	"*.jpeg",
	".git/",
	"test_*.png",
	"sample_*.jpg"
	],
	commit_message="Upload Singtel Bill Scanner - AI OCR for bill processing"
	)

	print("\n🎉 SUCCESS! Upload completed!")
	print("🔗 Your project is available at:")
	print(" https://huggingface.co/spaces/Cosmo125/Singtel_Bill_Scanner")
	print()
	print("💡 It may take a few minutes to build and become available")
	return True

	except Exception as e:
	print(f"❌ Upload failed: {e}")
	print("\n🔧 Troubleshooting:")
	print("1. Check your internet connection")
	print("2. Verify your HF token has write permissions")
	print("3. Make sure the repository name is available")
	return False

	def main():
	"""Main function to handle the upload process"""
	print("SINGTEL BILL SCANNER - HUGGING FACE UPLOAD")
	print("=" * 50)

	# Check setup
	if check_huggingface_setup():
	print("\n✅ Setup looks good!")

	choice = input("\nDo you want to upload to Hugging Face now? (y/n): ").lower()
	if choice in ['y', 'yes']:
	upload_to_huggingface()
	else:
	print("Upload cancelled.")
	else:
	print("\n🔧 Setup needed!")
	choice = input("Do you want to set up your HF token now? (y/n): ").lower()
	if choice in ['y', 'yes']:
	if setup_huggingface_token():
	print("\n✅ Token setup complete!")
	upload_choice = input("Upload to Hugging Face now? (y/n): ").lower()
	if upload_choice in ['y', 'yes']:
	upload_to_huggingface()
	else:
	print("❌ Token setup failed")
	else:
	print("Setup cancelled.")

	if __name__ == "__main__":
	main()