|
|
|
|
|
""" |
|
Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
|
""" |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
import os |
|
import subprocess |
|
import sys |
|
|
|
MODEL = "google/gemma-3n-E4B-it" |
|
PORT = os.environ.get("VLLM_PORT", "8000") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
if not HF_TOKEN: |
|
print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
|
sys.exit(1) |
|
|
|
cmd = [ |
|
sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
|
"--model", MODEL, |
|
"--port", PORT, |
|
"--host", "0.0.0.0", |
|
"--token", HF_TOKEN |
|
] |
|
|
|
print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
|
subprocess.run(cmd) |
|
|
|
""" |
|
Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
|
""" |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
import os |
|
import subprocess |
|
import sys |
|
|
|
MODEL = "google/gemma-3n-E4B-it" |
|
PORT = os.environ.get("VLLM_PORT", "8000") |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
if not HF_TOKEN: |
|
print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
|
sys.exit(1) |
|
|
|
cmd = [ |
|
sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
|
"--model", MODEL, |
|
"--port", PORT, |
|
"--host", "0.0.0.0", |
|
"--token", HF_TOKEN |
|
] |
|
|
|
print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
|
subprocess.run(cmd) |
|
|