Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +11 -19
Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
FROM
|
2 |
|
3 |
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
|
@@ -8,7 +8,6 @@ ARG QUANT="Q4_K_M"
|
|
8 |
ARG SERVED_NAME="qwen-nano"
|
9 |
ARG PARALLEL=4
|
10 |
ARG CTX_SIZE=8192
|
11 |
-
ARG KV_CACHE_QUANT="q8_0"
|
12 |
ARG EMBEDDING_ONLY=0
|
13 |
ARG RERANK_ONLY=0
|
14 |
|
@@ -18,8 +17,8 @@ ENV LLAMA_ARG_CTX_SIZE=${CTX_SIZE}
|
|
18 |
ENV LLAMA_ARG_BATCH=512
|
19 |
ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
|
20 |
ENV LLAMA_ARG_FLASH_ATTN=1
|
21 |
-
ENV LLAMA_ARG_CACHE_TYPE_K="
|
22 |
-
ENV LLAMA_ARG_CACHE_TYPE_V="
|
23 |
ENV LLAMA_ARG_MLOCK=1
|
24 |
ENV LLAMA_ARG_N_GPU_LAYERS=0
|
25 |
ENV LLAMA_ARG_HOST="0.0.0.0"
|
@@ -28,26 +27,15 @@ ENV LLAMA_ARG_ALIAS="${SERVED_NAME}"
|
|
28 |
ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
|
29 |
ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
|
30 |
|
31 |
-
RUN
|
32 |
-
|
33 |
-
git \
|
34 |
-
curl \
|
35 |
-
cmake \
|
36 |
-
ccache \
|
37 |
-
pkg-config \
|
38 |
-
build-essential \
|
39 |
-
ca-certificates \
|
40 |
-
libboost-system-dev \
|
41 |
-
libcurl4-openssl-dev \
|
42 |
-
libboost-filesystem-dev && \
|
43 |
-
rm -rf /var/lib/apt/lists/*
|
44 |
|
45 |
RUN mkdir -p /app && mkdir -p /.cache
|
46 |
# cache dir for llama.cpp to download models
|
47 |
RUN chmod -R 777 /.cache
|
48 |
|
49 |
WORKDIR /app
|
50 |
-
RUN git clone https://github.com/ggml-org/llama.cpp.git
|
51 |
# RUN git clone https://github.com/ikawrakow/ik_llama.cpp.git llama.cpp
|
52 |
WORKDIR /app/llama.cpp
|
53 |
RUN cmake -B build \
|
@@ -57,6 +45,10 @@ RUN cmake -B build \
|
|
57 |
-DLLAMA_BUILD_EXAMPLES=ON \
|
58 |
-DGGML_ALL_WARNINGS=OFF \
|
59 |
-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
|
|
|
|
|
|
|
|
|
60 |
-DCMAKE_BUILD_TYPE=Release
|
61 |
RUN cmake --build build --config Release --target llama-server -j $(nproc)
|
62 |
|
@@ -64,4 +56,4 @@ WORKDIR /app
|
|
64 |
|
65 |
EXPOSE 7860
|
66 |
|
67 |
-
CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt"]
|
|
|
1 |
+
FROM archlinux:latest
|
2 |
|
3 |
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
|
|
|
8 |
ARG SERVED_NAME="qwen-nano"
|
9 |
ARG PARALLEL=4
|
10 |
ARG CTX_SIZE=8192
|
|
|
11 |
ARG EMBEDDING_ONLY=0
|
12 |
ARG RERANK_ONLY=0
|
13 |
|
|
|
17 |
ENV LLAMA_ARG_BATCH=512
|
18 |
ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
|
19 |
ENV LLAMA_ARG_FLASH_ATTN=1
|
20 |
+
ENV LLAMA_ARG_CACHE_TYPE_K="q8_0"
|
21 |
+
ENV LLAMA_ARG_CACHE_TYPE_V="q4_1"
|
22 |
ENV LLAMA_ARG_MLOCK=1
|
23 |
ENV LLAMA_ARG_N_GPU_LAYERS=0
|
24 |
ENV LLAMA_ARG_HOST="0.0.0.0"
|
|
|
27 |
ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
|
28 |
ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
|
29 |
|
30 |
+
RUN pacman -Syu --noconfirm --overwrite '*'
|
31 |
+
RUN pacman -S base-devel git git-lfs cmake curl openblas openblas64 blas64-openblas python gcc-libs glibc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
RUN mkdir -p /app && mkdir -p /.cache
|
34 |
# cache dir for llama.cpp to download models
|
35 |
RUN chmod -R 777 /.cache
|
36 |
|
37 |
WORKDIR /app
|
38 |
+
RUN git clone --depth 1 --single-branch --branch master https://github.com/ggml-org/llama.cpp.git
|
39 |
# RUN git clone https://github.com/ikawrakow/ik_llama.cpp.git llama.cpp
|
40 |
WORKDIR /app/llama.cpp
|
41 |
RUN cmake -B build \
|
|
|
45 |
-DLLAMA_BUILD_EXAMPLES=ON \
|
46 |
-DGGML_ALL_WARNINGS=OFF \
|
47 |
-DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
|
48 |
+
-DGGML_BLAS=ON \
|
49 |
+
-DGGML_BLAS_VENDOR=OpenBLAS \
|
50 |
+
-DGGML_NATIVE=ON \
|
51 |
+
-Wno-dev \
|
52 |
-DCMAKE_BUILD_TYPE=Release
|
53 |
RUN cmake --build build --config Release --target llama-server -j $(nproc)
|
54 |
|
|
|
56 |
|
57 |
EXPOSE 7860
|
58 |
|
59 |
+
CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt", "--swa-full"]
|