|
|
|
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime |
|
|
|
|
|
RUN apt-get update && \ |
|
apt-get install -y --no-install-recommends \ |
|
git-lfs build-essential portaudio19-dev ffmpeg && \ |
|
rm -rf /var/lib/apt/lists/* |
|
|
|
|
|
RUN useradd -m -u 1000 user |
|
USER user |
|
WORKDIR /app |
|
ENV PATH="/home/user/.local/bin:$PATH" |
|
|
|
|
|
COPY --chown=user . /app |
|
|
|
ENV HF_HOME=/app/.cache |
|
ENV VLLM_USE_LM_FORMAT_ENFORCER=0 |
|
|
|
|
|
|
|
|
|
|
|
RUN pip install --no-cache-dir "transformers==4.40.2" "lm-format-enforcer==0.9.8" |
|
RUN pip install --no-cache-dir vllm>=0.9.0 |
|
|
|
|
|
COPY --chown=user requirements.txt . |
|
RUN pip install --upgrade pip && \ |
|
pip install --no-cache-dir -r requirements.txt |
|
|
|
RUN pip install --no-cache-dir "realtimetts[system]>=0.5.5" |
|
|
|
RUN pip install --no-cache-dir flashinfer-cu121-preview || echo "FlashInfer not available β continuing without." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPOSE 7860 |
|
|
|
|
|
|
|
ENV ORPHEUS_MODEL=SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1 |
|
ENV MODEL_ID="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1" |
|
ENV ORPHEUS_API_URL=http://127.0.0.1:1234 |
|
|
|
|
|
CMD bash -c "\ |
|
python -m vllm.entrypoints.openai.api_server \ |
|
--model ${MODEL_ID} \ |
|
--port 1234 \ |
|
--dtype bfloat16 \ |
|
--gpu-memory-utilization 0.85 \ |
|
--max-model-len 8192 & \ |
|
uvicorn app:app --host 0.0.0.0 --port 7860" |
|
|
|
|