Spaces:

yzhuang
/

MixtureOfInputs

Sleeping

File size: 975 Bytes

import subprocess
import threading
import os
import time
import spaces

def setup_mixinputs():
    # Step 1: Run mixinputs setup
    subprocess.run(["mixinputs", "setup"], check=True)

# @spaces.GPU(duration=240)
def launch_vllm_server(beta=1.0):
    # Step 2: Set environment variables
    env = os.environ.copy()
    env["MIXINPUTS_BETA"] = str(beta)
    env["VLLM_USE_V1"] = "1"

    # Step 3: Launch vLLM with custom options
    cmd = [
        "vllm", "serve",
        "Qwen/Qwen3-4B",
        "--tensor-parallel-size", "1",
        "--enforce-eager",
        "--max-model-len", "2048",
        "--max-seq-len-to-capture", "2048",
        "--max-num-seqs", "1",
        "--port", "8000",
        "--disable-async-output-proc"
    ]
    subprocess.run(cmd, env=env)

# Step 1: Setup
setup_mixinputs()

# Step 2: Launch vLLM server in background
threading.Thread(target=launch_vllm_server, daemon=True).start()

# Step 3: Give time for server to initialize
time.sleep(60)