liamcripwell commited on
Commit
ebea37f
Β·
verified Β·
1 Parent(s): 2a3e019

Update start.sh

Browse files
Files changed (1) hide show
  1. start.sh +49 -17
start.sh CHANGED
@@ -1,36 +1,68 @@
1
  #!/bin/bash
2
 
3
- echo "Starting vLLM server..."
4
- # Start vLLM in the background with logging
 
 
5
  python3 -m vllm.entrypoints.openai.api_server \
6
  --model numind/NuMarkdown-8B-Thinking \
7
  --port 8000 \
8
  --host 0.0.0.0 \
9
- --max-model-len 20000 \
10
- --gpu-memory-utilization 0.95 > $HOME/app/vllm.log 2>&1 &
 
 
 
11
 
12
  VLLM_PID=$!
13
  echo "vLLM started with PID: $VLLM_PID"
14
 
15
- # Wait for vLLM to be ready
16
- echo "Waiting for vLLM server to start..."
17
- for i in {1..90}; do
18
- if curl -s http://localhost:8000/v1/models > /dev/null; then
19
- echo "vLLM server is ready!"
 
 
 
20
  break
21
  fi
22
- echo "Waiting... ($i/90)"
 
 
 
 
 
 
 
 
 
23
  sleep 2
24
  done
25
 
26
- # Check if vLLM is actually running
27
  if ! curl -s http://localhost:8000/v1/models > /dev/null; then
28
- echo "ERROR: vLLM server failed to start!"
29
- echo "vLLM logs:"
30
- cat $HOME/app/vllm.log
31
  exit 1
32
  fi
33
 
34
- echo "Starting Gradio app..."
35
- # Start Gradio app in the foreground
36
- python3 $HOME/app/app.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/bin/bash
2
 
3
+ echo "=== Starting NuMarkdown-8B-Thinking Space ==="
4
+ echo "Starting vLLM server with optimized settings..."
5
+
6
+ # Start vLLM with HF Spaces optimizations
7
  python3 -m vllm.entrypoints.openai.api_server \
8
  --model numind/NuMarkdown-8B-Thinking \
9
  --port 8000 \
10
  --host 0.0.0.0 \
11
+ --max-model-len 8048 \
12
+ --gpu-memory-utilization 0.9 \
13
+ --disable-log-requests \
14
+ --tensor-parallel-size 1 \
15
+ --trust-remote-code > $HOME/app/vllm.log 2>&1 &
16
 
17
  VLLM_PID=$!
18
  echo "vLLM started with PID: $VLLM_PID"
19
 
20
+ # More aggressive waiting with health checks
21
+ echo "Waiting for vLLM server to start (this may take 5-10 minutes)..."
22
+ for i in {1..180}; do # Wait up to 6 minutes
23
+ if curl -s --connect-timeout 5 http://localhost:8000/health > /dev/null 2>&1; then
24
+ echo "βœ“ vLLM health check passed!"
25
+ break
26
+ elif curl -s --connect-timeout 5 http://localhost:8000/v1/models > /dev/null 2>&1; then
27
+ echo "βœ“ vLLM server is ready!"
28
  break
29
  fi
30
+
31
+ # Show progress every 10 seconds
32
+ if [ $((i % 10)) -eq 0 ]; then
33
+ echo "Still waiting... ($i/180) - checking vLLM process"
34
+ if ! ps -p $VLLM_PID > /dev/null; then
35
+ echo "❌ vLLM process died! Checking logs:"
36
+ tail -20 $HOME/app/vllm.log
37
+ exit 1
38
+ fi
39
+ fi
40
  sleep 2
41
  done
42
 
43
+ # Final check
44
  if ! curl -s http://localhost:8000/v1/models > /dev/null; then
45
+ echo "❌ vLLM server failed to start after 6 minutes!"
46
+ echo "Last 50 lines of vLLM logs:"
47
+ tail -50 $HOME/app/vllm.log
48
  exit 1
49
  fi
50
 
51
+ echo "βœ… vLLM server is ready!"
52
+ echo "=== Starting Gradio App ==="
53
+ echo "Port 7860 status before launching Gradio:"
54
+ netstat -tuln | grep :7860 || echo "Port 7860 is free"
55
+
56
+ echo "Environment check:"
57
+ echo "PORT=${PORT:-7860}"
58
+ echo "PWD=$(pwd)"
59
+ echo "USER=$(whoami)"
60
+
61
+ # Launch Gradio with explicit error handling
62
+ echo "Launching Gradio..."
63
+ python3 $HOME/app/app.py || {
64
+ echo "❌ Gradio failed to start!"
65
+ echo "Checking if port is in use:"
66
+ netstat -tuln | grep :7860
67
+ exit 1
68
+ }