Spaces:

numind
/

NuMarkdown-8B-Thinking

Sleeping

NuMarkdown-8B-Thinking / start.sh

initial content push

ee86994 3 months ago

927 Bytes

	#!/bin/bash

	echo "Starting vLLM server..."
	# Start vLLM in the background with logging
	python3 -m vllm.entrypoints.openai.api_server \
	--model numind/NuMarkdown-8B-Thinking \
	--port 8000 \
	--host 0.0.0.0 \
	--max-model-len 20000 \
	--gpu-memory-utilization 0.6 > $HOME/app/vllm.log 2>&1 &

	VLLM_PID=$!
	echo "vLLM started with PID: $VLLM_PID"

	# Wait for vLLM to be ready
	echo "Waiting for vLLM server to start..."
	for i in {1..90}; do
	if curl -s http://localhost:8000/v1/models > /dev/null; then
	echo "vLLM server is ready!"
	break
	fi
	echo "Waiting... ($i/90)"
	sleep 2
	done

	# Check if vLLM is actually running
	if ! curl -s http://localhost:8000/v1/models > /dev/null; then
	echo "ERROR: vLLM server failed to start!"
	echo "vLLM logs:"
	cat $HOME/app/vllm.log
	exit 1
	fi

	echo "Starting Gradio app..."
	# Start Gradio app in the foreground
	python3 $HOME/app/app.py