#!/bin/bash # Remote Worker Startup Script # Usage: ./start_remote_worker.sh DISPATCHER_IP=${1:-"192.168.1.50"} LOCAL_IP=${2:-$(hostname -I | awk '{print $1}')} NUM_GPUS=${3:-1} DISPATCHER_URL="http://${DISPATCHER_IP}:7860" echo "๐Ÿš€ Starting Remote GPU Workers" echo "===============================" echo "๐ŸŒ Dispatcher: $DISPATCHER_URL" echo "๐Ÿ“ Local IP: $LOCAL_IP" echo "๐Ÿ–ฅ๏ธ GPUs: $NUM_GPUS" echo "" # Check if required files exist REQUIRED_FILES=("worker.py" "utils.py" "latent_stats.json") for file in "${REQUIRED_FILES[@]}"; do if [[ ! -f "$file" ]]; then echo "โŒ Error: $file not found" echo "๐Ÿ’ก Copy required files from main machine:" echo " scp user@dispatcher-machine:/path/to/{worker.py,utils.py,latent_stats.json,config_*.yaml} ." exit 1 fi done # Test GPU access echo "๐Ÿงช Testing GPU access..." python -c "import torch; print(f'โœ… CUDA available: {torch.cuda.is_available()}'); print(f'๐Ÿ“Š GPU count: {torch.cuda.device_count()}')" # Test dispatcher connectivity echo "๐ŸŒ Testing dispatcher connectivity..." if curl -s --connect-timeout 5 "$DISPATCHER_URL" > /dev/null; then echo "โœ… Dispatcher reachable" else echo "โŒ Cannot reach dispatcher at $DISPATCHER_URL" echo "๐Ÿ’ก Check network connectivity and dispatcher status" exit 1 fi # Start workers echo "๐Ÿ”ง Starting $NUM_GPUS GPU workers..." for ((i=0; i "worker_gpu_${i}.log" 2>&1 & WORKER_PID=$! echo "โœ… Worker $i started (PID: $WORKER_PID)" # Small delay between starts sleep 2 done echo "" echo "๐ŸŽ‰ All workers started!" echo "๐Ÿ“‹ Monitor logs:" for ((i=0; i