File size: 4,316 Bytes
795183d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""

Monitor the model download progress and provide helpful information

"""

import os
import time
from pathlib import Path

def check_huggingface_cache():
    """Check what's in the Hugging Face cache directory"""
    cache_dir = Path.home() / ".cache" / "huggingface"
    
    print(f"Checking cache directory: {cache_dir}")
    
    if cache_dir.exists():
        print("βœ… Cache directory exists")
        
        # Check for transformers cache
        transformers_cache = cache_dir / "transformers"
        if transformers_cache.exists():
            print("βœ… Transformers cache found")
            
            # List cached models
            for item in transformers_cache.iterdir():
                if item.is_dir():
                    size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
                    size_mb = size / (1024 * 1024)
                    print(f"   πŸ“ {item.name}: {size_mb:.1f} MB")
        
        # Check for hub cache
        hub_cache = cache_dir / "hub"
        if hub_cache.exists():
            print("βœ… Hub cache found")
            
            total_size = 0
            model_count = 0
            
            for item in hub_cache.iterdir():
                if item.is_dir() and "microsoft" in item.name.lower():
                    try:
                        size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
                        size_mb = size / (1024 * 1024)
                        if size_mb > 1:  # Only show models > 1MB
                            print(f"   πŸ“ {item.name}: {size_mb:.1f} MB")
                            total_size += size_mb
                            model_count += 1
                    except:
                        pass
            
            if total_size > 0:
                print(f"\nπŸ“Š Total cached models: {model_count}")
                print(f"πŸ“Š Total cache size: {total_size:.1f} MB")
                
                if total_size > 1000:  # > 1GB
                    print("βœ… Large model detected - TrOCR might be ready!")
                
    else:
        print("❌ Cache directory not found yet")
    
    return cache_dir.exists()

def monitor_download():
    """Monitor the download progress"""
    print("πŸ” DOWNLOAD MONITOR")
    print("=" * 40)
    
    print("Looking for model download activity...")
    
    for i in range(30):  # Check for 30 iterations
        print(f"\n⏱️  Check #{i+1}/30:")
        
        cache_exists = check_huggingface_cache()
        
        if cache_exists:
            print("πŸ“₯ Download activity detected!")
        
        print("⏳ Waiting 10 seconds before next check...")
        time.sleep(10)
    
    print("\nβœ… Monitoring complete!")

def show_status():
    """Show current status"""
    print("SINGTEL BILL SCANNER - STATUS CHECK")
    print("=" * 50)
    
    # Check Python environment
    print("🐍 Python Environment:")
    print(f"   Working directory: {os.getcwd()}")
    
    # Check for test files
    test_files = ["test_bill.png", "test_singtel_bill.png"]
    print("\nπŸ“ Test Files:")
    for file in test_files:
        if os.path.exists(file):
            size = os.path.getsize(file)
            print(f"   βœ… {file} ({size} bytes)")
        else:
            print(f"   ❌ {file} (not found)")
    
    # Check cache
    print("\nπŸ“¦ Model Cache:")
    check_huggingface_cache()
    
    print("\nπŸ’‘ What's happening:")
    print("   - The TrOCR model is downloading in the background")
    print("   - This is a one-time process (~1.3GB)")
    print("   - Once complete, all future runs will be instant")
    print("   - The model will be cached permanently")

if __name__ == "__main__":
    import sys
    
    if len(sys.argv) > 1 and sys.argv[1] == "monitor":
        monitor_download()
    else:
        show_status()
        
        choice = input("\nWould you like to monitor download progress? (y/n): ")
        if choice.lower() in ['y', 'yes']:
            monitor_download()
        else:
            print("\nπŸ’‘ Tip: Run 'python monitor.py monitor' to watch progress")
            print("The download will continue in the background.")