da03 commited on
Commit
3085e5d
Β·
1 Parent(s): 9c3b17f
Files changed (1) hide show
  1. start_system.sh +45 -2
start_system.sh CHANGED
@@ -2,8 +2,37 @@
2
 
3
  # Multi-GPU Neural OS Startup Script
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  # Default values
6
- NUM_GPUS=2
7
  DISPATCHER_PORT=8000
8
 
9
  # Parse command line arguments
@@ -19,8 +48,15 @@ while [[ $# -gt 0 ]]; do
19
  ;;
20
  -h|--help)
21
  echo "Usage: $0 [--num-gpus N] [--port PORT]"
22
- echo " --num-gpus N Number of GPU workers to start (default: 2)"
23
  echo " --port PORT Dispatcher port (default: 8000)"
 
 
 
 
 
 
 
24
  exit 0
25
  ;;
26
  *)
@@ -58,6 +94,13 @@ trap cleanup SIGINT SIGTERM
58
 
59
  echo "πŸš€ Starting Multi-GPU Neural OS System"
60
  echo "========================================"
 
 
 
 
 
 
 
61
  echo "πŸ“Š Number of GPUs: $NUM_GPUS"
62
  echo "🌐 Dispatcher port: $DISPATCHER_PORT"
63
  echo "πŸ’» Worker ports: $(seq -s', ' 8001 $((8000 + NUM_GPUS)))"
 
2
 
3
  # Multi-GPU Neural OS Startup Script
4
 
5
+ # Function to detect number of GPUs automatically
6
+ detect_gpu_count() {
7
+ if command -v nvidia-smi >/dev/null 2>&1; then
8
+ # Use nvidia-smi to count GPUs
9
+ local gpu_count=$(nvidia-smi -L 2>/dev/null | wc -l)
10
+ if [ "$gpu_count" -gt 0 ]; then
11
+ echo "$gpu_count"
12
+ return 0
13
+ fi
14
+ fi
15
+
16
+ # If nvidia-smi fails, try alternative methods
17
+ if [ -d "/proc/driver/nvidia/gpus" ]; then
18
+ local gpu_count=$(ls -d /proc/driver/nvidia/gpus/*/information 2>/dev/null | wc -l)
19
+ if [ "$gpu_count" -gt 0 ]; then
20
+ echo "$gpu_count"
21
+ return 0
22
+ fi
23
+ fi
24
+
25
+ # Default fallback
26
+ echo "1"
27
+ return 1
28
+ }
29
+
30
+ # Detect GPU count automatically
31
+ DETECTED_GPUS=$(detect_gpu_count)
32
+ GPU_DETECTION_SUCCESS=$?
33
+
34
  # Default values
35
+ NUM_GPUS=$DETECTED_GPUS
36
  DISPATCHER_PORT=8000
37
 
38
  # Parse command line arguments
 
48
  ;;
49
  -h|--help)
50
  echo "Usage: $0 [--num-gpus N] [--port PORT]"
51
+ echo " --num-gpus N Number of GPU workers to start (default: auto-detected)"
52
  echo " --port PORT Dispatcher port (default: 8000)"
53
+ echo ""
54
+ echo "GPU Detection:"
55
+ echo " Automatically detects available GPUs using nvidia-smi"
56
+ echo " Currently detected: $DETECTED_GPUS GPU(s)"
57
+ if [ $GPU_DETECTION_SUCCESS -ne 0 ]; then
58
+ echo " ⚠️ GPU detection failed - using fallback of 1 GPU"
59
+ fi
60
  exit 0
61
  ;;
62
  *)
 
94
 
95
  echo "πŸš€ Starting Multi-GPU Neural OS System"
96
  echo "========================================"
97
+ echo "πŸ” GPU Detection: $DETECTED_GPUS GPU(s) detected"
98
+ if [ $GPU_DETECTION_SUCCESS -ne 0 ]; then
99
+ echo "⚠️ GPU detection failed - using fallback count"
100
+ elif command -v nvidia-smi >/dev/null 2>&1; then
101
+ echo "πŸ’Ž Detected GPUs:"
102
+ nvidia-smi -L 2>/dev/null | sed 's/^/ /'
103
+ fi
104
  echo "πŸ“Š Number of GPUs: $NUM_GPUS"
105
  echo "🌐 Dispatcher port: $DISPATCHER_PORT"
106
  echo "πŸ’» Worker ports: $(seq -s', ' 8001 $((8000 + NUM_GPUS)))"