Orion-zhen commited on
Commit
96e2315
·
verified ·
1 Parent(s): 1f012fd

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +66 -0
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM debian:stable-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ # passed from space environment
6
+ ARG MODEL_ID="unsloth/Qwen3-1.7B-GGUF"
7
+ ARG QUANT="Q4_K_M"
8
+ ARG SERVED_NAME="qwen-nano"
9
+ ARG PARALLEL=4
10
+ ARG CTX_SIZE=8192
11
+ ARG KV_CACHE_QUANT="q8_0"
12
+ ARG EMBEDDING_ONLY=0
13
+ ARG RERANK_ONLY=0
14
+
15
+ # llama.cpp env configs
16
+ ENV LLAMA_ARG_HF_REPO="${MODEL_ID}:${QUANT}"
17
+ ENV LLAMA_ARG_CTX_SIZE=${CTX_SIZE}
18
+ ENV LLAMA_ARG_BATCH=512
19
+ ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
20
+ ENV LLAMA_ARG_FLASH_ATTN=1
21
+ ENV LLAMA_ARG_CACHE_TYPE_K="${KV_CACHE_QUANT}"
22
+ ENV LLAMA_ARG_CACHE_TYPE_V="${KV_CACHE_QUANT}"
23
+ ENV LLAMA_ARG_MLOCK=1
24
+ ENV LLAMA_ARG_N_GPU_LAYERS=0
25
+ ENV LLAMA_ARG_HOST="0.0.0.0"
26
+ ENV LLAMA_ARG_PORT=7860
27
+ ENV LLAMA_ARG_ALIAS="${SERVED_NAME}"
28
+ ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
29
+ ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
30
+
31
+ RUN apt-get update && \
32
+ apt-get install -y --no-install-recommends \
33
+ git \
34
+ curl \
35
+ cmake \
36
+ ccache \
37
+ pkg-config \
38
+ build-essential \
39
+ ca-certificates \
40
+ libboost-system-dev \
41
+ libcurl4-openssl-dev \
42
+ libboost-filesystem-dev && \
43
+ rm -rf /var/lib/apt/lists/*
44
+
45
+ RUN mkdir -p /app && mkdir -p /.cache
46
+ # cache dir for llama.cpp to download models
47
+ RUN chmod -R 777 /.cache
48
+
49
+ WORKDIR /app
50
+ RUN git clone https://github.com/ggml-org/llama.cpp.git
51
+ WORKDIR /app/llama.cpp
52
+ RUN cmake -B build \
53
+ -DGGML_LTO=ON \
54
+ -DLLAMA_CURL=ON \
55
+ -DLLAMA_BUILD_SERVER=ON \
56
+ -DLLAMA_BUILD_EXAMPLES=ON \
57
+ -DGGML_ALL_WARNINGS=OFF \
58
+ -DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
59
+ -DCMAKE_BUILD_TYPE=Release
60
+ RUN cmake --build build --config Release --target llama-server -j $(nproc)
61
+
62
+ WORKDIR /app
63
+
64
+ EXPOSE 7860
65
+
66
+ CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt", "--prio", "3"]