""" Monitor the model download progress and provide helpful information """ import os import time from pathlib import Path def check_huggingface_cache(): """Check what's in the Hugging Face cache directory""" cache_dir = Path.home() / ".cache" / "huggingface" print(f"Checking cache directory: {cache_dir}") if cache_dir.exists(): print("āœ… Cache directory exists") # Check for transformers cache transformers_cache = cache_dir / "transformers" if transformers_cache.exists(): print("āœ… Transformers cache found") # List cached models for item in transformers_cache.iterdir(): if item.is_dir(): size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file()) size_mb = size / (1024 * 1024) print(f" šŸ“ {item.name}: {size_mb:.1f} MB") # Check for hub cache hub_cache = cache_dir / "hub" if hub_cache.exists(): print("āœ… Hub cache found") total_size = 0 model_count = 0 for item in hub_cache.iterdir(): if item.is_dir() and "microsoft" in item.name.lower(): try: size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file()) size_mb = size / (1024 * 1024) if size_mb > 1: # Only show models > 1MB print(f" šŸ“ {item.name}: {size_mb:.1f} MB") total_size += size_mb model_count += 1 except: pass if total_size > 0: print(f"\nšŸ“Š Total cached models: {model_count}") print(f"šŸ“Š Total cache size: {total_size:.1f} MB") if total_size > 1000: # > 1GB print("āœ… Large model detected - TrOCR might be ready!") else: print("āŒ Cache directory not found yet") return cache_dir.exists() def monitor_download(): """Monitor the download progress""" print("šŸ” DOWNLOAD MONITOR") print("=" * 40) print("Looking for model download activity...") for i in range(30): # Check for 30 iterations print(f"\nā±ļø Check #{i+1}/30:") cache_exists = check_huggingface_cache() if cache_exists: print("šŸ“„ Download activity detected!") print("ā³ Waiting 10 seconds before next check...") time.sleep(10) print("\nāœ… Monitoring complete!") def show_status(): """Show current status""" print("SINGTEL BILL SCANNER - STATUS CHECK") print("=" * 50) # Check Python environment print("šŸ Python Environment:") print(f" Working directory: {os.getcwd()}") # Check for test files test_files = ["test_bill.png", "test_singtel_bill.png"] print("\nšŸ“ Test Files:") for file in test_files: if os.path.exists(file): size = os.path.getsize(file) print(f" āœ… {file} ({size} bytes)") else: print(f" āŒ {file} (not found)") # Check cache print("\nšŸ“¦ Model Cache:") check_huggingface_cache() print("\nšŸ’” What's happening:") print(" - The TrOCR model is downloading in the background") print(" - This is a one-time process (~1.3GB)") print(" - Once complete, all future runs will be instant") print(" - The model will be cached permanently") if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] == "monitor": monitor_download() else: show_status() choice = input("\nWould you like to monitor download progress? (y/n): ") if choice.lower() in ['y', 'yes']: monitor_download() else: print("\nšŸ’” Tip: Run 'python monitor.py monitor' to watch progress") print("The download will continue in the background.")