Spaces:
Sleeping
Sleeping
Create Dockerfile
Browse files- Dockerfile +66 -0
Dockerfile
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM debian:stable-slim
|
2 |
+
|
3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
+
|
5 |
+
# passed from space environment
|
6 |
+
ARG MODEL_ID="unsloth/Qwen3-1.7B-GGUF"
|
7 |
+
ARG QUANT="Q4_K_M"
|
8 |
+
ARG SERVED_NAME="qwen-nano"
|
9 |
+
ARG PARALLEL=4
|
10 |
+
ARG CTX_SIZE=8192
|
11 |
+
ARG KV_CACHE_QUANT="q8_0"
|
12 |
+
ARG EMBEDDING_ONLY=0
|
13 |
+
ARG RERANK_ONLY=0
|
14 |
+
|
15 |
+
# llama.cpp env configs
|
16 |
+
ENV LLAMA_ARG_HF_REPO="${MODEL_ID}:${QUANT}"
|
17 |
+
ENV LLAMA_ARG_CTX_SIZE=${CTX_SIZE}
|
18 |
+
ENV LLAMA_ARG_BATCH=512
|
19 |
+
ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
|
20 |
+
ENV LLAMA_ARG_FLASH_ATTN=1
|
21 |
+
ENV LLAMA_ARG_CACHE_TYPE_K="${KV_CACHE_QUANT}"
|
22 |
+
ENV LLAMA_ARG_CACHE_TYPE_V="${KV_CACHE_QUANT}"
|
23 |
+
ENV LLAMA_ARG_MLOCK=1
|
24 |
+
ENV LLAMA_ARG_N_GPU_LAYERS=0
|
25 |
+
ENV LLAMA_ARG_HOST="0.0.0.0"
|
26 |
+
ENV LLAMA_ARG_PORT=7860
|
27 |
+
ENV LLAMA_ARG_ALIAS="${SERVED_NAME}"
|
28 |
+
ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
|
29 |
+
ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
|
30 |
+
|
31 |
+
RUN apt-get update && \
|
32 |
+
apt-get install -y --no-install-recommends \
|
33 |
+
git \
|
34 |
+
curl \
|
35 |
+
cmake \
|
36 |
+
ccache \
|
37 |
+
pkg-config \
|
38 |
+
build-essential \
|
39 |
+
ca-certificates \
|
40 |
+
libboost-system-dev \
|
41 |
+
libcurl4-openssl-dev \
|
42 |
+
libboost-filesystem-dev && \
|
43 |
+
rm -rf /var/lib/apt/lists/*
|
44 |
+
|
45 |
+
RUN mkdir -p /app && mkdir -p /.cache
|
46 |
+
# cache dir for llama.cpp to download models
|
47 |
+
RUN chmod -R 777 /.cache
|
48 |
+
|
49 |
+
WORKDIR /app
|
50 |
+
RUN git clone https://github.com/ggml-org/llama.cpp.git
|
51 |
+
WORKDIR /app/llama.cpp
|
52 |
+
RUN cmake -B build \
|
53 |
+
-DGGML_LTO=ON \
|
54 |
+
-DLLAMA_CURL=ON \
|
55 |
+
-DLLAMA_BUILD_SERVER=ON \
|
56 |
+
-DLLAMA_BUILD_EXAMPLES=ON \
|
57 |
+
-DGGML_ALL_WARNINGS=OFF \
|
58 |
+
-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
|
59 |
+
-DCMAKE_BUILD_TYPE=Release
|
60 |
+
RUN cmake --build build --config Release --target llama-server -j $(nproc)
|
61 |
+
|
62 |
+
WORKDIR /app
|
63 |
+
|
64 |
+
EXPOSE 7860
|
65 |
+
|
66 |
+
CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt", "--prio", "3"]
|