Spaces:
Running
on
L40S
Running
on
L40S
Update start.sh
Browse files
start.sh
CHANGED
@@ -1,36 +1,68 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
-
echo "Starting
|
4 |
-
|
|
|
|
|
5 |
python3 -m vllm.entrypoints.openai.api_server \
|
6 |
--model numind/NuMarkdown-8B-Thinking \
|
7 |
--port 8000 \
|
8 |
--host 0.0.0.0 \
|
9 |
-
--max-model-len
|
10 |
-
--gpu-memory-utilization 0.
|
|
|
|
|
|
|
11 |
|
12 |
VLLM_PID=$!
|
13 |
echo "vLLM started with PID: $VLLM_PID"
|
14 |
|
15 |
-
#
|
16 |
-
echo "Waiting for vLLM server to start..."
|
17 |
-
for i in {1..
|
18 |
-
if curl -s http://localhost:8000/
|
19 |
-
echo "vLLM
|
|
|
|
|
|
|
20 |
break
|
21 |
fi
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
sleep 2
|
24 |
done
|
25 |
|
26 |
-
#
|
27 |
if ! curl -s http://localhost:8000/v1/models > /dev/null; then
|
28 |
-
echo "
|
29 |
-
echo "vLLM logs:"
|
30 |
-
|
31 |
exit 1
|
32 |
fi
|
33 |
|
34 |
-
echo "
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
+
echo "=== Starting NuMarkdown-8B-Thinking Space ==="
|
4 |
+
echo "Starting vLLM server with optimized settings..."
|
5 |
+
|
6 |
+
# Start vLLM with HF Spaces optimizations
|
7 |
python3 -m vllm.entrypoints.openai.api_server \
|
8 |
--model numind/NuMarkdown-8B-Thinking \
|
9 |
--port 8000 \
|
10 |
--host 0.0.0.0 \
|
11 |
+
--max-model-len 8048 \
|
12 |
+
--gpu-memory-utilization 0.9 \
|
13 |
+
--disable-log-requests \
|
14 |
+
--tensor-parallel-size 1 \
|
15 |
+
--trust-remote-code > $HOME/app/vllm.log 2>&1 &
|
16 |
|
17 |
VLLM_PID=$!
|
18 |
echo "vLLM started with PID: $VLLM_PID"
|
19 |
|
20 |
+
# More aggressive waiting with health checks
|
21 |
+
echo "Waiting for vLLM server to start (this may take 5-10 minutes)..."
|
22 |
+
for i in {1..180}; do # Wait up to 6 minutes
|
23 |
+
if curl -s --connect-timeout 5 http://localhost:8000/health > /dev/null 2>&1; then
|
24 |
+
echo "β vLLM health check passed!"
|
25 |
+
break
|
26 |
+
elif curl -s --connect-timeout 5 http://localhost:8000/v1/models > /dev/null 2>&1; then
|
27 |
+
echo "β vLLM server is ready!"
|
28 |
break
|
29 |
fi
|
30 |
+
|
31 |
+
# Show progress every 10 seconds
|
32 |
+
if [ $((i % 10)) -eq 0 ]; then
|
33 |
+
echo "Still waiting... ($i/180) - checking vLLM process"
|
34 |
+
if ! ps -p $VLLM_PID > /dev/null; then
|
35 |
+
echo "β vLLM process died! Checking logs:"
|
36 |
+
tail -20 $HOME/app/vllm.log
|
37 |
+
exit 1
|
38 |
+
fi
|
39 |
+
fi
|
40 |
sleep 2
|
41 |
done
|
42 |
|
43 |
+
# Final check
|
44 |
if ! curl -s http://localhost:8000/v1/models > /dev/null; then
|
45 |
+
echo "β vLLM server failed to start after 6 minutes!"
|
46 |
+
echo "Last 50 lines of vLLM logs:"
|
47 |
+
tail -50 $HOME/app/vllm.log
|
48 |
exit 1
|
49 |
fi
|
50 |
|
51 |
+
echo "β
vLLM server is ready!"
|
52 |
+
echo "=== Starting Gradio App ==="
|
53 |
+
echo "Port 7860 status before launching Gradio:"
|
54 |
+
netstat -tuln | grep :7860 || echo "Port 7860 is free"
|
55 |
+
|
56 |
+
echo "Environment check:"
|
57 |
+
echo "PORT=${PORT:-7860}"
|
58 |
+
echo "PWD=$(pwd)"
|
59 |
+
echo "USER=$(whoami)"
|
60 |
+
|
61 |
+
# Launch Gradio with explicit error handling
|
62 |
+
echo "Launching Gradio..."
|
63 |
+
python3 $HOME/app/app.py || {
|
64 |
+
echo "β Gradio failed to start!"
|
65 |
+
echo "Checking if port is in use:"
|
66 |
+
netstat -tuln | grep :7860
|
67 |
+
exit 1
|
68 |
+
}
|