Spaces:
Sleeping
Sleeping
| echo "Starting vLLM server..." | |
| # Start vLLM in the background with logging | |
| python3 -m vllm.entrypoints.openai.api_server \ | |
| --model numind/NuMarkdown-8B-Thinking \ | |
| --port 8000 \ | |
| --host 0.0.0.0 \ | |
| --max-model-len 20000 \ | |
| --gpu-memory-utilization 0.6 > $HOME/app/vllm.log 2>&1 & | |
| VLLM_PID=$! | |
| echo "vLLM started with PID: $VLLM_PID" | |
| # Wait for vLLM to be ready | |
| echo "Waiting for vLLM server to start..." | |
| for i in {1..90}; do | |
| if curl -s http://localhost:8000/v1/models > /dev/null; then | |
| echo "vLLM server is ready!" | |
| break | |
| fi | |
| echo "Waiting... ($i/90)" | |
| sleep 2 | |
| done | |
| # Check if vLLM is actually running | |
| if ! curl -s http://localhost:8000/v1/models > /dev/null; then | |
| echo "ERROR: vLLM server failed to start!" | |
| echo "vLLM logs:" | |
| cat $HOME/app/vllm.log | |
| exit 1 | |
| fi | |
| echo "Starting Gradio app..." | |
| # Start Gradio app in the foreground | |
| python3 $HOME/app/app.py | |