File size: 9,219 Bytes
795183d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
"""

Hugging Face Upload Setup and Helper

This script helps you push your Singtel Bill Scanner to Hugging Face

"""

import os
import subprocess
from huggingface_hub import HfApi

def check_huggingface_setup():
    """Check if Hugging Face is properly set up"""
    print("πŸ” Checking Hugging Face setup...")
    
    # Check if huggingface-hub is installed
    try:
        import huggingface_hub
        print(f"βœ… huggingface-hub installed (version: {huggingface_hub.__version__})")
    except ImportError:
        print("❌ huggingface-hub not installed")
        return False
    
    # Check for token
    token = os.getenv("HF_TOKEN")
    if token:
        print("βœ… HF_TOKEN environment variable found")
        # Test token validity
        try:
            api = HfApi(token=token)
            user_info = api.whoami()
            print(f"βœ… Token valid for user: {user_info['name']}")
            return True
        except Exception as e:
            print(f"❌ Token invalid: {e}")
            return False
    else:
        print("❌ HF_TOKEN environment variable not set")
        return False

def setup_huggingface_token():
    """Guide user through setting up HF token"""
    print("\nπŸ”‘ HUGGING FACE TOKEN SETUP")
    print("=" * 40)
    print("1. Go to: https://huggingface.co/settings/tokens")
    print("2. Create a new token (or copy existing one)")
    print("3. Choose 'Write' permissions")
    print("4. Copy the token")
    print()
    
    token = input("Paste your Hugging Face token here: ").strip()
    
    if token:
        # Set environment variable for this session
        os.environ["HF_TOKEN"] = token
        
        # Try to set it permanently in PowerShell
        try:
            print("\nπŸ”§ Setting token in environment...")
            cmd = f'[Environment]::SetEnvironmentVariable("HF_TOKEN", "{token}", "User")'
            subprocess.run(["powershell", "-Command", cmd], check=True)
            print("βœ… Token saved to user environment variables")
            print("πŸ’‘ You may need to restart VS Code to see the change")
        except Exception as e:
            print(f"⚠️  Could not save permanently: {e}")
            print("πŸ’‘ Token is set for this session only")
        
        # Test the token
        try:
            api = HfApi(token=token)
            user_info = api.whoami()
            print(f"βœ… Token works! Logged in as: {user_info['name']}")
            return True
        except Exception as e:
            print(f"❌ Token test failed: {e}")
            return False
    else:
        print("❌ No token provided")
        return False

def create_model_card():
    """Create a README.md for Hugging Face"""
    readme_content = """---

title: Singtel Bill Scanner

emoji: πŸ“±

colorFrom: red

colorTo: orange

sdk: streamlit

sdk_version: 1.28.0

app_file: app.py

pinned: false

tags:

- computer-vision

- ocr

- trocr

- bill-processing

- singtel

- document-ai

---



# Singtel Bill Scanner πŸ“±πŸ’‘



An AI-powered optical character recognition (OCR) system specifically designed for processing Singtel telecommunications bills. This project uses Microsoft's TrOCR (Transformer-based OCR) model to extract text from bill images and parse key information.



## Features



- πŸ” **Text Extraction**: Uses TrOCR for accurate text recognition from handwritten and printed text

- πŸ“Š **Bill Parsing**: Automatically extracts key information like:

  - Total amount due

  - Due date

  - Account number

  - Service charges

  - Billing period

- πŸš€ **Easy to Use**: Simple pipeline interface

- ⚑ **Fast Processing**: Cached models for instant subsequent runs

- 🎯 **Singtel Specific**: Optimized patterns for Singtel bill formats



## Models Used



- **TrOCR**: `microsoft/trocr-base-handwritten` - For text extraction

- **LayoutLMv3**: `microsoft/layoutlmv3-base` - For document structure understanding



## Quick Start



```python

from transformers import pipeline

from PIL import Image



# Initialize the OCR pipeline

pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")



# Process your bill image

image = Image.open("your_singtel_bill.jpg")

result = pipe(image)

extracted_text = result[0]['generated_text']



print(f"Extracted text: {extracted_text}")

```



## Advanced Usage



Use the `SingtelBillScanner` class for comprehensive bill processing:



```python

from singtel_scanner import SingtelBillScanner



# Initialize scanner

scanner = SingtelBillScanner()



# Process bill and get structured data

result = scanner.process_bill("bill_image.jpg")



print(f"Total Amount: ${result['total_amount']}")

print(f"Due Date: {result['due_date']}")

print(f"Account: {result['account_number']}")

```



## Installation



```bash

pip install torch transformers Pillow requests huggingface-hub

```



## Files



- `singtel_scanner.py` - Main scanner class with bill parsing

- `test_model.py` - Comprehensive testing and examples

- `quick_test.py` - Simple test script

- `working_example.py` - Basic functionality demonstration

- `requirements.txt` - Package dependencies



## Performance



- **Model Size**: ~1.3GB (downloaded once, cached forever)

- **Processing Time**: ~2-5 seconds per image (after initial load)

- **Accuracy**: High accuracy for clear, well-lit bill images



## Use Cases



- πŸ“± **Personal Finance**: Track Singtel bills automatically

- 🏒 **Business Automation**: Process multiple bills in batch

- πŸ“Š **Expense Management**: Extract data for accounting systems

- πŸ” **Document Digitization**: Convert physical bills to digital records



## Requirements



- Python 3.8+

- ~2GB free disk space (for models)

- Good internet connection (for initial model download)



## Contributing



Contributions welcome! Areas for improvement:

- Additional bill format support

- Enhanced parsing accuracy

- Mobile app integration

- Batch processing optimization



## License



This project is open source. Models are subject to their respective licenses:

- TrOCR: MIT License

- LayoutLMv3: MIT License



---



*Created with ❀️ for the Singtel community*

"""
    
    with open("README.md", "w", encoding="utf-8") as f:
        f.write(readme_content)
    
    print("βœ… Model card (README.md) created successfully!")

def upload_to_huggingface():
    """Upload the project to Hugging Face"""
    print("\nπŸš€ UPLOADING TO HUGGING FACE")
    print("=" * 40)
    
    try:
        token = os.getenv("HF_TOKEN")
        api = HfApi(token=token)
        
        print("πŸ“ Preparing files for upload...")
        
        # Create model card if it doesn't exist
        if not os.path.exists("README.md"):
            create_model_card()
        
        print("πŸ“€ Starting upload...")
        api.upload_folder(
            folder_path=".",
            repo_id="Cosmo125/Singtel_Bill_Scanner",
            repo_type="space",  # Changed to 'space' for better visibility
            ignore_patterns=[
                "*.pyc", 
                "__pycache__/", 
                ".venv/", 
                "*.jpg", 
                "*.png", 
                "*.jpeg",
                ".git/",
                "test_*.png",
                "sample_*.jpg"
            ],
            commit_message="Upload Singtel Bill Scanner - AI OCR for bill processing"
        )
        
        print("\nπŸŽ‰ SUCCESS! Upload completed!")
        print("πŸ”— Your project is available at:")
        print("   https://huggingface.co/spaces/Cosmo125/Singtel_Bill_Scanner")
        print()
        print("πŸ’‘ It may take a few minutes to build and become available")
        return True
        
    except Exception as e:
        print(f"❌ Upload failed: {e}")
        print("\nπŸ”§ Troubleshooting:")
        print("1. Check your internet connection")
        print("2. Verify your HF token has write permissions")
        print("3. Make sure the repository name is available")
        return False

def main():
    """Main function to handle the upload process"""
    print("SINGTEL BILL SCANNER - HUGGING FACE UPLOAD")
    print("=" * 50)
    
    # Check setup
    if check_huggingface_setup():
        print("\nβœ… Setup looks good!")
        
        choice = input("\nDo you want to upload to Hugging Face now? (y/n): ").lower()
        if choice in ['y', 'yes']:
            upload_to_huggingface()
        else:
            print("Upload cancelled.")
    else:
        print("\nπŸ”§ Setup needed!")
        choice = input("Do you want to set up your HF token now? (y/n): ").lower()
        if choice in ['y', 'yes']:
            if setup_huggingface_token():
                print("\nβœ… Token setup complete!")
                upload_choice = input("Upload to Hugging Face now? (y/n): ").lower()
                if upload_choice in ['y', 'yes']:
                    upload_to_huggingface()
            else:
                print("❌ Token setup failed")
        else:
            print("Setup cancelled.")

if __name__ == "__main__":
    main()