""" Hugging Face Upload Setup and Helper This script helps you push your Singtel Bill Scanner to Hugging Face """ import os import subprocess from huggingface_hub import HfApi def check_huggingface_setup(): """Check if Hugging Face is properly set up""" print("šŸ” Checking Hugging Face setup...") # Check if huggingface-hub is installed try: import huggingface_hub print(f"āœ… huggingface-hub installed (version: {huggingface_hub.__version__})") except ImportError: print("āŒ huggingface-hub not installed") return False # Check for token token = os.getenv("HF_TOKEN") if token: print("āœ… HF_TOKEN environment variable found") # Test token validity try: api = HfApi(token=token) user_info = api.whoami() print(f"āœ… Token valid for user: {user_info['name']}") return True except Exception as e: print(f"āŒ Token invalid: {e}") return False else: print("āŒ HF_TOKEN environment variable not set") return False def setup_huggingface_token(): """Guide user through setting up HF token""" print("\nšŸ”‘ HUGGING FACE TOKEN SETUP") print("=" * 40) print("1. Go to: https://huggingface.co/settings/tokens") print("2. Create a new token (or copy existing one)") print("3. Choose 'Write' permissions") print("4. Copy the token") print() token = input("Paste your Hugging Face token here: ").strip() if token: # Set environment variable for this session os.environ["HF_TOKEN"] = token # Try to set it permanently in PowerShell try: print("\nšŸ”§ Setting token in environment...") cmd = f'[Environment]::SetEnvironmentVariable("HF_TOKEN", "{token}", "User")' subprocess.run(["powershell", "-Command", cmd], check=True) print("āœ… Token saved to user environment variables") print("šŸ’” You may need to restart VS Code to see the change") except Exception as e: print(f"āš ļø Could not save permanently: {e}") print("šŸ’” Token is set for this session only") # Test the token try: api = HfApi(token=token) user_info = api.whoami() print(f"āœ… Token works! Logged in as: {user_info['name']}") return True except Exception as e: print(f"āŒ Token test failed: {e}") return False else: print("āŒ No token provided") return False def create_model_card(): """Create a README.md for Hugging Face""" readme_content = """--- title: Singtel Bill Scanner emoji: šŸ“± colorFrom: red colorTo: orange sdk: streamlit sdk_version: 1.28.0 app_file: app.py pinned: false tags: - computer-vision - ocr - trocr - bill-processing - singtel - document-ai --- # Singtel Bill Scanner šŸ“±šŸ’” An AI-powered optical character recognition (OCR) system specifically designed for processing Singtel telecommunications bills. This project uses Microsoft's TrOCR (Transformer-based OCR) model to extract text from bill images and parse key information. ## Features - šŸ” **Text Extraction**: Uses TrOCR for accurate text recognition from handwritten and printed text - šŸ“Š **Bill Parsing**: Automatically extracts key information like: - Total amount due - Due date - Account number - Service charges - Billing period - šŸš€ **Easy to Use**: Simple pipeline interface - ⚔ **Fast Processing**: Cached models for instant subsequent runs - šŸŽÆ **Singtel Specific**: Optimized patterns for Singtel bill formats ## Models Used - **TrOCR**: `microsoft/trocr-base-handwritten` - For text extraction - **LayoutLMv3**: `microsoft/layoutlmv3-base` - For document structure understanding ## Quick Start ```python from transformers import pipeline from PIL import Image # Initialize the OCR pipeline pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten") # Process your bill image image = Image.open("your_singtel_bill.jpg") result = pipe(image) extracted_text = result[0]['generated_text'] print(f"Extracted text: {extracted_text}") ``` ## Advanced Usage Use the `SingtelBillScanner` class for comprehensive bill processing: ```python from singtel_scanner import SingtelBillScanner # Initialize scanner scanner = SingtelBillScanner() # Process bill and get structured data result = scanner.process_bill("bill_image.jpg") print(f"Total Amount: ${result['total_amount']}") print(f"Due Date: {result['due_date']}") print(f"Account: {result['account_number']}") ``` ## Installation ```bash pip install torch transformers Pillow requests huggingface-hub ``` ## Files - `singtel_scanner.py` - Main scanner class with bill parsing - `test_model.py` - Comprehensive testing and examples - `quick_test.py` - Simple test script - `working_example.py` - Basic functionality demonstration - `requirements.txt` - Package dependencies ## Performance - **Model Size**: ~1.3GB (downloaded once, cached forever) - **Processing Time**: ~2-5 seconds per image (after initial load) - **Accuracy**: High accuracy for clear, well-lit bill images ## Use Cases - šŸ“± **Personal Finance**: Track Singtel bills automatically - šŸ¢ **Business Automation**: Process multiple bills in batch - šŸ“Š **Expense Management**: Extract data for accounting systems - šŸ” **Document Digitization**: Convert physical bills to digital records ## Requirements - Python 3.8+ - ~2GB free disk space (for models) - Good internet connection (for initial model download) ## Contributing Contributions welcome! Areas for improvement: - Additional bill format support - Enhanced parsing accuracy - Mobile app integration - Batch processing optimization ## License This project is open source. Models are subject to their respective licenses: - TrOCR: MIT License - LayoutLMv3: MIT License --- *Created with ā¤ļø for the Singtel community* """ with open("README.md", "w", encoding="utf-8") as f: f.write(readme_content) print("āœ… Model card (README.md) created successfully!") def upload_to_huggingface(): """Upload the project to Hugging Face""" print("\nšŸš€ UPLOADING TO HUGGING FACE") print("=" * 40) try: token = os.getenv("HF_TOKEN") api = HfApi(token=token) print("šŸ“ Preparing files for upload...") # Create model card if it doesn't exist if not os.path.exists("README.md"): create_model_card() print("šŸ“¤ Starting upload...") api.upload_folder( folder_path=".", repo_id="Cosmo125/Singtel_Bill_Scanner", repo_type="space", # Changed to 'space' for better visibility ignore_patterns=[ "*.pyc", "__pycache__/", ".venv/", "*.jpg", "*.png", "*.jpeg", ".git/", "test_*.png", "sample_*.jpg" ], commit_message="Upload Singtel Bill Scanner - AI OCR for bill processing" ) print("\nšŸŽ‰ SUCCESS! Upload completed!") print("šŸ”— Your project is available at:") print(" https://huggingface.co/spaces/Cosmo125/Singtel_Bill_Scanner") print() print("šŸ’” It may take a few minutes to build and become available") return True except Exception as e: print(f"āŒ Upload failed: {e}") print("\nšŸ”§ Troubleshooting:") print("1. Check your internet connection") print("2. Verify your HF token has write permissions") print("3. Make sure the repository name is available") return False def main(): """Main function to handle the upload process""" print("SINGTEL BILL SCANNER - HUGGING FACE UPLOAD") print("=" * 50) # Check setup if check_huggingface_setup(): print("\nāœ… Setup looks good!") choice = input("\nDo you want to upload to Hugging Face now? (y/n): ").lower() if choice in ['y', 'yes']: upload_to_huggingface() else: print("Upload cancelled.") else: print("\nšŸ”§ Setup needed!") choice = input("Do you want to set up your HF token now? (y/n): ").lower() if choice in ['y', 'yes']: if setup_huggingface_token(): print("\nāœ… Token setup complete!") upload_choice = input("Upload to Hugging Face now? (y/n): ").lower() if upload_choice in ['y', 'yes']: upload_to_huggingface() else: print("āŒ Token setup failed") else: print("Setup cancelled.") if __name__ == "__main__": main()