ID-gambling-website-detection / verify_environment.py
Azzan Dwi Riski
fix tokenizer issues
4e933a0
#!/usr/bin/env python3
"""
Script untuk memverifikasi apakah environment Docker memiliki semua yang diperlukan
untuk menjalankan aplikasi gambling detection.
"""
import os
import sys
import subprocess
import torch
from pathlib import Path
import importlib.util
def check_package(package_name):
"""Periksa apakah paket terinstall."""
return importlib.util.find_spec(package_name) is not None
def check_command(command):
"""Periksa apakah command tersedia di sistem."""
try:
subprocess.run([command, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except FileNotFoundError:
return False
def main():
"""Fungsi utama untuk memeriksa environment."""
# Header
print("="*50)
print("Environment Verification for Gambling Detection App")
print("="*50)
# Check Python version
print(f"Python version: {sys.version}")
# Check CUDA
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU: {torch.cuda.get_device_name(0)}")
# Check dependencies
dependencies = ["gradio", "torch", "transformers", "pytesseract", "playwright", "PIL", "pandas"]
print("\nChecking required packages:")
for package in dependencies:
status = "βœ… Installed" if check_package(package) else "❌ Missing"
print(f" - {package}: {status}")
# Check external tools
print("\nChecking external tools:")
# Check Tesseract
try:
result = subprocess.run(['tesseract', '--version'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True)
print(f" - Tesseract OCR: βœ… Installed ({result.stdout.splitlines()[0] if result.stdout else 'version unknown'})")
except FileNotFoundError:
print(" - Tesseract OCR: ❌ Missing")
# Check language support in Tesseract
try:
result = subprocess.run(['tesseract', '--list-langs'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
if 'ind' in result.stdout:
print(" - Tesseract Indonesian language: βœ… Installed")
else:
print(" - Tesseract Indonesian language: ❌ Missing")
except FileNotFoundError:
print(" - Tesseract Indonesian language: ❌ Could not check")
# Check Playwright
print("\nChecking Playwright:")
try:
playwright_installed = check_package("playwright")
print(f" - Playwright package: {'βœ… Installed' if playwright_installed else '❌ Missing'}")
# Check if browsers are installed
if playwright_installed:
try:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser_types = []
try:
browser = p.chromium.launch()
browser.close()
browser_types.append("Chromium")
except Exception:
pass
print(f" - Installed browsers: {', '.join(browser_types) if browser_types else 'None'}")
except Exception as e:
print(f" - Error checking browsers: {e}")
except Exception as e:
print(f" - Error checking Playwright: {e}")
# Check directories and permissions
print("\nChecking directories and permissions:")
directories = [
'/app/tokenizers',
'/app/models',
'/app/screenshots',
'/.cache'
]
for directory in directories:
path = Path(directory)
if path.exists():
writable = os.access(path, os.W_OK)
print(f" - {directory}: βœ… Exists {'(Writable)' if writable else '(Not Writable)'}")
else:
print(f" - {directory}: ❌ Does not exist")
print("\nVerification complete!")
if __name__ == "__main__":
main()