File size: 1,584 Bytes
7e32345
 
9f40e8d
7e32345
9f40e8d
b173427
 
 
7e32345
 
9f40e8d
7e32345
b173427
 
7e32345
 
b173427
7e32345
c6ab136
 
 
9f40e8d
7e32345
 
6aaa9c3
7e32345
9f40e8d
7e32345
 
470d3ad
 
7e32345
 
 
 
 
 
 
 
9f40e8d
e2ff2dd
 
 
7e32345
9f40e8d
7e32345
 
6719b2d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Use an official Python runtime as a base image
FROM python:3.10

# Set non-interactive for apt
ENV DEBIAN_FRONTEND=noninteractive \
    RUSTUP_HOME=/root/.rustup \
    CARGO_HOME=/root/.cargo \
    PATH=/root/.cargo/bin:$PATH \
    TRANSFORMERS_CACHE=/app/cache \
    HF_HOME=/app/cache

# Set working directory
WORKDIR /app

# Install system dependencies, Rust, and build tools
RUN apt-get update && apt-get install -y --no-install-recommends \
      build-essential cmake git curl wget ninja-build libgomp1 ca-certificates \
      gcc g++ libffi-dev libgcc-s1 libstdc++6 libopenblas-dev \
    && rm -rf /var/lib/apt/lists/* \
    && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
    && rustup default stable

# Prepare cache directory
RUN mkdir -p /app/cache && chmod -R 777 /app/cache

# Copy and install Python requirements (excluding llama-cpp-python)
COPY requirements.txt .
RUN sed -i '/llama-cpp-python/d' requirements.txt \
 && pip install --no-cache-dir -r requirements.txt


# Clone & build llama-cpp-python (with its llama.cpp submodule)
RUN git clone --recursive https://github.com/abetlen/llama-cpp-python.git /tmp/llama-cpp-python \
 && cd /tmp/llama-cpp-python \
 # ensure we have all submodules
 && git submodule update --init --recursive \
 # install from source
 && python -m pip install --no-cache-dir . \
 && rm -rf /tmp/llama-cpp-python

 # Copy application code and data
COPY llm_server.py ./

# Expose the port your FastAPI app runs on
EXPOSE 7860

# Launch
CMD ["uvicorn", "llm_server:app", "--host", "0.0.0.0", "--port", "7860"]