Spaces:
Runtime error
Runtime error
da03
commited on
Commit
·
888f299
1
Parent(s):
3085e5d
- start_system.sh +9 -2
start_system.sh
CHANGED
@@ -33,7 +33,7 @@ GPU_DETECTION_SUCCESS=$?
|
|
33 |
|
34 |
# Default values
|
35 |
NUM_GPUS=$DETECTED_GPUS
|
36 |
-
DISPATCHER_PORT=
|
37 |
|
38 |
# Parse command line arguments
|
39 |
while [[ $# -gt 0 ]]; do
|
@@ -49,7 +49,7 @@ while [[ $# -gt 0 ]]; do
|
|
49 |
-h|--help)
|
50 |
echo "Usage: $0 [--num-gpus N] [--port PORT]"
|
51 |
echo " --num-gpus N Number of GPU workers to start (default: auto-detected)"
|
52 |
-
echo " --port PORT Dispatcher port (default:
|
53 |
echo ""
|
54 |
echo "GPU Detection:"
|
55 |
echo " Automatically detects available GPUs using nvidia-smi"
|
@@ -107,6 +107,13 @@ echo "💻 Worker ports: $(seq -s', ' 8001 $((8000 + NUM_GPUS)))"
|
|
107 |
echo "📈 Analytics logging: system_analytics_$(date +%Y%m%d_%H%M%S).log"
|
108 |
echo ""
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# Check if required files exist
|
111 |
if [[ ! -f "dispatcher.py" ]]; then
|
112 |
echo "❌ Error: dispatcher.py not found"
|
|
|
33 |
|
34 |
# Default values
|
35 |
NUM_GPUS=$DETECTED_GPUS
|
36 |
+
DISPATCHER_PORT=7860
|
37 |
|
38 |
# Parse command line arguments
|
39 |
while [[ $# -gt 0 ]]; do
|
|
|
49 |
-h|--help)
|
50 |
echo "Usage: $0 [--num-gpus N] [--port PORT]"
|
51 |
echo " --num-gpus N Number of GPU workers to start (default: auto-detected)"
|
52 |
+
echo " --port PORT Dispatcher port (default: 7860)"
|
53 |
echo ""
|
54 |
echo "GPU Detection:"
|
55 |
echo " Automatically detects available GPUs using nvidia-smi"
|
|
|
107 |
echo "📈 Analytics logging: system_analytics_$(date +%Y%m%d_%H%M%S).log"
|
108 |
echo ""
|
109 |
|
110 |
+
# Validate that we're not trying to start more workers than GPUs
|
111 |
+
if [ "$NUM_GPUS" -gt "$DETECTED_GPUS" ]; then
|
112 |
+
echo "⚠️ Warning: Trying to start $NUM_GPUS workers but only $DETECTED_GPUS GPU(s) detected"
|
113 |
+
echo " This may cause GPU sharing or errors. Consider using --num-gpus $DETECTED_GPUS"
|
114 |
+
echo ""
|
115 |
+
fi
|
116 |
+
|
117 |
# Check if required files exist
|
118 |
if [[ ! -f "dispatcher.py" ]]; then
|
119 |
echo "❌ Error: dispatcher.py not found"
|