File size: 1,352 Bytes
49ee602
755842e
ab4421a
7b879b9
6458940
49ee602
 
 
 
 
6458940
 
7b879b9
ad94d02
 
 
7b879b9
9ecbb7d
ad94d02
7b879b9
 
c8f8ff0
7b879b9
675256a
7b879b9
efa5012
49ee602
 
675256a
7b879b9
 
 
 
 
ad94d02
7b879b9
 
c8f8ff0
abb092c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# GPU‑fertige Basis mit Python 3.10, CUDA 12.1, cuDNN 8
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

# System‑Tools (schlank halten!)
RUN apt-get update && \
    apt-get install -y git-lfs build-essential bash \
      git \
      wget curl procps gnupg \
      cmake \
      htop vim nano && \
    rm -rf /var/lib/apt/lists/*

# Non‑root‑User, weil Spaces das mögen
RUN useradd -m -u 1000 user
USER user
WORKDIR /app
ENV PATH="/home/user/.local/bin:$PATH"
#ENV HF_HOME=/app/.cache  # HF‑Cache in deinem Schreib­verzeichnis

# Python-Abhängigkeiten
COPY --chown=user requirements.txt .
RUN pip install --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Optional: flash‑attn (Ampere 86 wird erkannt, Wheel vorhanden)
# fertiges Wheel laden (kein nvcc)
RUN pip install --no-cache-dir \
    https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

# Mount das geheime HF‑Token beim Build:
#   Settings → Secrets → Name: HF_TOKEN  (scope: "read")
#ARG HF_TOKEN
#RUN --mount=type=secret,id=HF_TOKEN \
#    echo "machine huggingface.co login __token__ password $(cat /run/secrets/HF_TOKEN)" > ~/.netrc

# App‑Code
COPY --chown=user . /app

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]