Orion-zhen commited on
Commit
f527df5
·
verified ·
1 Parent(s): 64cbfb9

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +11 -19
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM debian:stable-slim
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
 
@@ -8,7 +8,6 @@ ARG QUANT="Q4_K_M"
8
  ARG SERVED_NAME="qwen-nano"
9
  ARG PARALLEL=4
10
  ARG CTX_SIZE=8192
11
- ARG KV_CACHE_QUANT="q8_0"
12
  ARG EMBEDDING_ONLY=0
13
  ARG RERANK_ONLY=0
14
 
@@ -18,8 +17,8 @@ ENV LLAMA_ARG_CTX_SIZE=${CTX_SIZE}
18
  ENV LLAMA_ARG_BATCH=512
19
  ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
20
  ENV LLAMA_ARG_FLASH_ATTN=1
21
- ENV LLAMA_ARG_CACHE_TYPE_K="${KV_CACHE_QUANT}"
22
- ENV LLAMA_ARG_CACHE_TYPE_V="${KV_CACHE_QUANT}"
23
  ENV LLAMA_ARG_MLOCK=1
24
  ENV LLAMA_ARG_N_GPU_LAYERS=0
25
  ENV LLAMA_ARG_HOST="0.0.0.0"
@@ -28,26 +27,15 @@ ENV LLAMA_ARG_ALIAS="${SERVED_NAME}"
28
  ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
29
  ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
30
 
31
- RUN apt-get update && \
32
- apt-get install -y --no-install-recommends \
33
- git \
34
- curl \
35
- cmake \
36
- ccache \
37
- pkg-config \
38
- build-essential \
39
- ca-certificates \
40
- libboost-system-dev \
41
- libcurl4-openssl-dev \
42
- libboost-filesystem-dev && \
43
- rm -rf /var/lib/apt/lists/*
44
 
45
  RUN mkdir -p /app && mkdir -p /.cache
46
  # cache dir for llama.cpp to download models
47
  RUN chmod -R 777 /.cache
48
 
49
  WORKDIR /app
50
- RUN git clone https://github.com/ggml-org/llama.cpp.git
51
  # RUN git clone https://github.com/ikawrakow/ik_llama.cpp.git llama.cpp
52
  WORKDIR /app/llama.cpp
53
  RUN cmake -B build \
@@ -57,6 +45,10 @@ RUN cmake -B build \
57
  -DLLAMA_BUILD_EXAMPLES=ON \
58
  -DGGML_ALL_WARNINGS=OFF \
59
  -DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
 
 
 
 
60
  -DCMAKE_BUILD_TYPE=Release
61
  RUN cmake --build build --config Release --target llama-server -j $(nproc)
62
 
@@ -64,4 +56,4 @@ WORKDIR /app
64
 
65
  EXPOSE 7860
66
 
67
- CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt"]
 
1
+ FROM archlinux:latest
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
 
 
8
  ARG SERVED_NAME="qwen-nano"
9
  ARG PARALLEL=4
10
  ARG CTX_SIZE=8192
 
11
  ARG EMBEDDING_ONLY=0
12
  ARG RERANK_ONLY=0
13
 
 
17
  ENV LLAMA_ARG_BATCH=512
18
  ENV LLAMA_ARG_N_PARALLEL=${PARALLEL}
19
  ENV LLAMA_ARG_FLASH_ATTN=1
20
+ ENV LLAMA_ARG_CACHE_TYPE_K="q8_0"
21
+ ENV LLAMA_ARG_CACHE_TYPE_V="q4_1"
22
  ENV LLAMA_ARG_MLOCK=1
23
  ENV LLAMA_ARG_N_GPU_LAYERS=0
24
  ENV LLAMA_ARG_HOST="0.0.0.0"
 
27
  ENV LLAMA_ARG_EMBEDDINGS=${EMBEDDING_ONLY}
28
  ENV LLAMA_ARG_RERANKING=${RERANK_ONLY}
29
 
30
+ RUN pacman -Syu --noconfirm --overwrite '*'
31
+ RUN pacman -S base-devel git git-lfs cmake curl openblas openblas64 blas64-openblas python gcc-libs glibc
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  RUN mkdir -p /app && mkdir -p /.cache
34
  # cache dir for llama.cpp to download models
35
  RUN chmod -R 777 /.cache
36
 
37
  WORKDIR /app
38
+ RUN git clone --depth 1 --single-branch --branch master https://github.com/ggml-org/llama.cpp.git
39
  # RUN git clone https://github.com/ikawrakow/ik_llama.cpp.git llama.cpp
40
  WORKDIR /app/llama.cpp
41
  RUN cmake -B build \
 
45
  -DLLAMA_BUILD_EXAMPLES=ON \
46
  -DGGML_ALL_WARNINGS=OFF \
47
  -DGGML_ALL_WARNINGS_3RD_PARTY=OFF \
48
+ -DGGML_BLAS=ON \
49
+ -DGGML_BLAS_VENDOR=OpenBLAS \
50
+ -DGGML_NATIVE=ON \
51
+ -Wno-dev \
52
  -DCMAKE_BUILD_TYPE=Release
53
  RUN cmake --build build --config Release --target llama-server -j $(nproc)
54
 
 
56
 
57
  EXPOSE 7860
58
 
59
+ CMD ["/app/llama.cpp/build/bin/llama-server", "--verbose-prompt", "--swa-full"]