File size: 2,279 Bytes
c700bd5
755842e
ab4421a
c700bd5
6458940
c700bd5
 
6458940
 
c700bd5
ad94d02
 
 
7b879b9
ad94d02
c700bd5
7b879b9
c8f8ff0
b87e018
35b038c
c700bd5
5d1b13c
 
 
b87e018
0543208
 
b87e018
 
 
 
 
 
c6d4d5b
8fc2ed8
0274aed
75ad871
5d1b13c
35b038c
5d1b13c
 
35b038c
 
af95e31
c700bd5
4448c19
 
ec3b313
c9fb95e
 
b972116
8fc2ed8
4448c19
8fc2ed8
edbb35f
 
 
 
488a20c
84bffde
 
56c49c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# PyTorch + CUDA 12.1 + cuDNN 8 (passt zur L4)
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

# -- System-Pakete minimal, aber was wir wirklich brauchen --
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        git-lfs build-essential portaudio19-dev ffmpeg && \
    rm -rf /var/lib/apt/lists/*

# Non-root-User (Spaces-Empfehlung)
RUN useradd -m -u 1000 user
USER user
WORKDIR /app
ENV PATH="/home/user/.local/bin:$PATH"

# Orpheus-/SNAC-Code + Server
COPY --chown=user . /app

ENV HF_HOME=/app/.cache
ENV VLLM_USE_LM_FORMAT_ENFORCER=0
# GPU-freundliches Torch-Upgrade (falls gewΓΌnscht)
#RUN pip install --no-cache-dir \
#        torch==2.3.1+cu121 torchaudio==2.3.1+cu121 \
#        --index-url https://download.pytorch.org/whl/cu121

RUN pip install --no-cache-dir "transformers==4.40.2" "lm-format-enforcer==0.9.8"
RUN pip install --no-cache-dir vllm>=0.9.0

# Python-AbhΓ€ngigkeiten
COPY --chown=user requirements.txt .
RUN pip install --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

RUN pip install --no-cache-dir "realtimetts[system]>=0.5.5"

RUN pip install --no-cache-dir flashinfer-cu121-preview || echo "FlashInfer not available – continuing without."

# nur *diese* Engine-Pflicht nachliefern, aber ohne Resolver:
# RUN pip install --no-cache-dir pyttsx3==2.90 --no-deps

# optional, um Warn-Spam zu reduzieren
# RUN pip install --no-cache-dir azure-cognitiveservices-speech==1.33.0 --no-deps \
#                 tqdm==4.66.1 --no-deps

EXPOSE 7860

# ───── Environment ───────────────────────────────────────

ENV ORPHEUS_MODEL=SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1
ENV MODEL_ID="SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1"
ENV ORPHEUS_API_URL=http://127.0.0.1:1234

# ───── Entrypoint ────────────────────────────────────────
CMD bash -c "\
  python -m vllm.entrypoints.openai.api_server \
         --model ${MODEL_ID} \
         --port 1234 \
         --dtype bfloat16 \
         --gpu-memory-utilization 0.85 \
         --max-model-len 8192 & \ 
  uvicorn app:app --host 0.0.0.0 --port 7860"