# (Removed for Hugging Face Spaces) #!/usr/bin/env python3 """ Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. """ from dotenv import load_dotenv load_dotenv() import os import subprocess import sys MODEL = "google/gemma-3n-E4B-it" PORT = os.environ.get("VLLM_PORT", "8000") HF_TOKEN = os.environ.get("HF_TOKEN") # User must set this for gated models if not HF_TOKEN: print("[ERROR] Please set the HF_TOKEN environment variable for model download.") sys.exit(1) cmd = [ sys.executable, "-m", "vllm.entrypoints.openai.api_server", "--model", MODEL, "--port", PORT, "--host", "0.0.0.0", "--token", HF_TOKEN ] print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") subprocess.run(cmd) #!/usr/bin/env python3 """ Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. """ from dotenv import load_dotenv load_dotenv() import os import subprocess import sys MODEL = "google/gemma-3n-E4B-it" PORT = os.environ.get("VLLM_PORT", "8000") HF_TOKEN = os.environ.get("HF_TOKEN") # User must set this for gated models if not HF_TOKEN: print("[ERROR] Please set the HF_TOKEN environment variable for model download.") sys.exit(1) cmd = [ sys.executable, "-m", "vllm.entrypoints.openai.api_server", "--model", MODEL, "--port", PORT, "--host", "0.0.0.0", "--token", HF_TOKEN ] print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") subprocess.run(cmd)