Spaces:
Running
on
A10G
Running
on
A10G
Oleg Shulyakov
commited on
Commit
·
b7bd975
1
Parent(s):
05d1b68
Migrate Docker to official llama.cpp CUDA image
Browse files- .dockerignore +15 -3
- .gitignore +203 -8
- Dockerfile +14 -49
- docker-compose.yml +4 -4
- requirements.txt +5 -0
- start.sh +3 -15
.dockerignore
CHANGED
@@ -1,3 +1,15 @@
|
|
1 |
-
|
2 |
-
/
|
3 |
-
/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# IDE
|
2 |
+
.idea/
|
3 |
+
.vscode/
|
4 |
+
|
5 |
+
.git*
|
6 |
+
.dockerignore
|
7 |
+
docker-compose.yml
|
8 |
+
Dockerfile
|
9 |
+
|
10 |
+
# LLama.cpp
|
11 |
+
llama.cpp/
|
12 |
+
|
13 |
+
# Working files
|
14 |
+
downloads/
|
15 |
+
outputs/
|
.gitignore
CHANGED
@@ -1,3 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Byte-compiled / optimized / DLL files
|
2 |
__pycache__/
|
3 |
*.py[cod]
|
@@ -11,7 +150,6 @@ __pycache__/
|
|
11 |
build/
|
12 |
develop-eggs/
|
13 |
dist/
|
14 |
-
downloads/
|
15 |
eggs/
|
16 |
.eggs/
|
17 |
lib/
|
@@ -106,10 +244,8 @@ ipython_config.py
|
|
106 |
#pdm.lock
|
107 |
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
# in version control.
|
109 |
-
# https://pdm.fming.dev
|
110 |
.pdm.toml
|
111 |
-
.pdm-python
|
112 |
-
.pdm-build/
|
113 |
|
114 |
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
115 |
__pypackages__/
|
@@ -161,7 +297,66 @@ cython_debug/
|
|
161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
162 |
#.idea/
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
|
2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows,python,jetbrains+all,visualstudiocode
|
3 |
+
|
4 |
+
### JetBrains+all ###
|
5 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
6 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
7 |
+
|
8 |
+
# User-specific stuff
|
9 |
+
.idea/**/workspace.xml
|
10 |
+
.idea/**/tasks.xml
|
11 |
+
.idea/**/usage.statistics.xml
|
12 |
+
.idea/**/dictionaries
|
13 |
+
.idea/**/shelf
|
14 |
+
|
15 |
+
# AWS User-specific
|
16 |
+
.idea/**/aws.xml
|
17 |
+
|
18 |
+
# Generated files
|
19 |
+
.idea/**/contentModel.xml
|
20 |
+
|
21 |
+
# Sensitive or high-churn files
|
22 |
+
.idea/**/dataSources/
|
23 |
+
.idea/**/dataSources.ids
|
24 |
+
.idea/**/dataSources.local.xml
|
25 |
+
.idea/**/sqlDataSources.xml
|
26 |
+
.idea/**/dynamic.xml
|
27 |
+
.idea/**/uiDesigner.xml
|
28 |
+
.idea/**/dbnavigator.xml
|
29 |
+
|
30 |
+
# Gradle
|
31 |
+
.idea/**/gradle.xml
|
32 |
+
.idea/**/libraries
|
33 |
+
|
34 |
+
# Gradle and Maven with auto-import
|
35 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
36 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
37 |
+
# auto-import.
|
38 |
+
# .idea/artifacts
|
39 |
+
# .idea/compiler.xml
|
40 |
+
# .idea/jarRepositories.xml
|
41 |
+
# .idea/modules.xml
|
42 |
+
# .idea/*.iml
|
43 |
+
# .idea/modules
|
44 |
+
# *.iml
|
45 |
+
# *.ipr
|
46 |
+
|
47 |
+
# CMake
|
48 |
+
cmake-build-*/
|
49 |
+
|
50 |
+
# Mongo Explorer plugin
|
51 |
+
.idea/**/mongoSettings.xml
|
52 |
+
|
53 |
+
# File-based project format
|
54 |
+
*.iws
|
55 |
+
|
56 |
+
# IntelliJ
|
57 |
+
out/
|
58 |
+
|
59 |
+
# mpeltonen/sbt-idea plugin
|
60 |
+
.idea_modules/
|
61 |
+
|
62 |
+
# JIRA plugin
|
63 |
+
atlassian-ide-plugin.xml
|
64 |
+
|
65 |
+
# Cursive Clojure plugin
|
66 |
+
.idea/replstate.xml
|
67 |
+
|
68 |
+
# SonarLint plugin
|
69 |
+
.idea/sonarlint/
|
70 |
+
|
71 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
72 |
+
com_crashlytics_export_strings.xml
|
73 |
+
crashlytics.properties
|
74 |
+
crashlytics-build.properties
|
75 |
+
fabric.properties
|
76 |
+
|
77 |
+
# Editor-based Rest Client
|
78 |
+
.idea/httpRequests
|
79 |
+
|
80 |
+
# Android studio 3.1+ serialized cache file
|
81 |
+
.idea/caches/build_file_checksums.ser
|
82 |
+
|
83 |
+
### JetBrains+all Patch ###
|
84 |
+
# Ignore everything but code style settings and run configurations
|
85 |
+
# that are supposed to be shared within teams.
|
86 |
+
|
87 |
+
.idea/*
|
88 |
+
|
89 |
+
!.idea/codeStyles
|
90 |
+
!.idea/runConfigurations
|
91 |
+
|
92 |
+
### Linux ###
|
93 |
+
*~
|
94 |
+
|
95 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
96 |
+
.fuse_hidden*
|
97 |
+
|
98 |
+
# KDE directory preferences
|
99 |
+
.directory
|
100 |
+
|
101 |
+
# Linux trash folder which might appear on any partition or disk
|
102 |
+
.Trash-*
|
103 |
+
|
104 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
105 |
+
.nfs*
|
106 |
+
|
107 |
+
### macOS ###
|
108 |
+
# General
|
109 |
+
.DS_Store
|
110 |
+
.AppleDouble
|
111 |
+
.LSOverride
|
112 |
+
|
113 |
+
# Icon must end with two \r
|
114 |
+
Icon
|
115 |
+
|
116 |
+
# Thumbnails
|
117 |
+
._*
|
118 |
+
|
119 |
+
# Files that might appear in the root of a volume
|
120 |
+
.DocumentRevisions-V100
|
121 |
+
.fseventsd
|
122 |
+
.Spotlight-V100
|
123 |
+
.TemporaryItems
|
124 |
+
.Trashes
|
125 |
+
.VolumeIcon.icns
|
126 |
+
.com.apple.timemachine.donotpresent
|
127 |
+
|
128 |
+
# Directories potentially created on remote AFP share
|
129 |
+
.AppleDB
|
130 |
+
.AppleDesktop
|
131 |
+
Network Trash Folder
|
132 |
+
Temporary Items
|
133 |
+
.apdisk
|
134 |
+
|
135 |
+
### macOS Patch ###
|
136 |
+
# iCloud generated files
|
137 |
+
*.icloud
|
138 |
+
|
139 |
+
### Python ###
|
140 |
# Byte-compiled / optimized / DLL files
|
141 |
__pycache__/
|
142 |
*.py[cod]
|
|
|
150 |
build/
|
151 |
develop-eggs/
|
152 |
dist/
|
|
|
153 |
eggs/
|
154 |
.eggs/
|
155 |
lib/
|
|
|
244 |
#pdm.lock
|
245 |
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
246 |
# in version control.
|
247 |
+
# https://pdm.fming.dev/#use-with-ide
|
248 |
.pdm.toml
|
|
|
|
|
249 |
|
250 |
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
251 |
__pypackages__/
|
|
|
297 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
298 |
#.idea/
|
299 |
|
300 |
+
### Python Patch ###
|
301 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
302 |
+
poetry.toml
|
303 |
+
|
304 |
+
# ruff
|
305 |
+
.ruff_cache/
|
306 |
+
|
307 |
+
# LSP config files
|
308 |
+
pyrightconfig.json
|
309 |
+
|
310 |
+
### VisualStudioCode ###
|
311 |
+
.vscode/*
|
312 |
+
!.vscode/settings.json
|
313 |
+
!.vscode/tasks.json
|
314 |
+
!.vscode/launch.json
|
315 |
+
!.vscode/extensions.json
|
316 |
+
!.vscode/*.code-snippets
|
317 |
+
|
318 |
+
# Local History for Visual Studio Code
|
319 |
+
.history/
|
320 |
+
|
321 |
+
# Built Visual Studio Code Extensions
|
322 |
+
*.vsix
|
323 |
+
|
324 |
+
### VisualStudioCode Patch ###
|
325 |
+
# Ignore all local history of files
|
326 |
+
.history
|
327 |
+
.ionide
|
328 |
+
|
329 |
+
### Windows ###
|
330 |
+
# Windows thumbnail cache files
|
331 |
+
Thumbs.db
|
332 |
+
Thumbs.db:encryptable
|
333 |
+
ehthumbs.db
|
334 |
+
ehthumbs_vista.db
|
335 |
+
|
336 |
+
# Dump file
|
337 |
+
*.stackdump
|
338 |
+
|
339 |
+
# Folder config file
|
340 |
+
[Dd]esktop.ini
|
341 |
+
|
342 |
+
# Recycle Bin used on file shares
|
343 |
+
$RECYCLE.BIN/
|
344 |
+
|
345 |
+
# Windows Installer files
|
346 |
+
*.cab
|
347 |
+
*.msi
|
348 |
+
*.msix
|
349 |
+
*.msm
|
350 |
+
*.msp
|
351 |
+
|
352 |
+
# Windows shortcuts
|
353 |
+
*.lnk
|
354 |
+
|
355 |
+
# End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
|
356 |
+
|
357 |
+
# Working folders
|
358 |
+
downloads/
|
359 |
+
outputs/
|
360 |
+
llama.cpp/
|
361 |
+
|
362 |
+
!*/.keep
|
Dockerfile
CHANGED
@@ -1,65 +1,30 @@
|
|
1 |
-
FROM
|
2 |
-
|
3 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
-
RUN apt-get update && \
|
5 |
-
apt-get upgrade -y && \
|
6 |
-
apt-get install -y --no-install-recommends --fix-missing \
|
7 |
-
git \
|
8 |
-
git-lfs \
|
9 |
-
wget \
|
10 |
-
curl \
|
11 |
-
cmake \
|
12 |
-
# python build dependencies \
|
13 |
-
build-essential \
|
14 |
-
libssl-dev \
|
15 |
-
zlib1g-dev \
|
16 |
-
libbz2-dev \
|
17 |
-
libreadline-dev \
|
18 |
-
libsqlite3-dev \
|
19 |
-
libncursesw5-dev \
|
20 |
-
xz-utils \
|
21 |
-
tk-dev \
|
22 |
-
libxml2-dev \
|
23 |
-
libxmlsec1-dev \
|
24 |
-
libffi-dev \
|
25 |
-
liblzma-dev \
|
26 |
-
ffmpeg \
|
27 |
-
nvidia-driver-570
|
28 |
|
29 |
# Check if user with UID 1000 exists, if not create it
|
30 |
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
|
31 |
USER 1000
|
|
|
32 |
ENV HOME=/home/user \
|
33 |
-
PATH
|
34 |
-
|
|
|
35 |
|
36 |
-
|
37 |
-
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
38 |
-
ARG PYTHON_VERSION=3.11
|
39 |
-
RUN pyenv install ${PYTHON_VERSION} && \
|
40 |
-
pyenv global ${PYTHON_VERSION} && \
|
41 |
-
pyenv rehash && \
|
42 |
-
pip install --no-cache-dir -U pip setuptools wheel && \
|
43 |
-
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search" "APScheduler"
|
44 |
|
45 |
-
COPY --chown=1000 . ${HOME}/app
|
46 |
-
RUN git clone https://github.com/ggerganov/llama.cpp
|
47 |
-
RUN pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
|
48 |
|
49 |
-
|
50 |
|
51 |
-
ENV PYTHONPATH=${HOME}/
|
52 |
-
|
|
|
53 |
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
54 |
GRADIO_ALLOW_FLAGGING=never \
|
55 |
GRADIO_NUM_PORTS=1 \
|
56 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
57 |
GRADIO_THEME=huggingface \
|
58 |
-
|
59 |
-
TQDM_MININTERVAL=1 \
|
60 |
-
SYSTEM=spaces \
|
61 |
-
LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
|
62 |
-
PATH=/usr/local/nvidia/bin:${PATH}
|
63 |
|
64 |
-
|
65 |
|
|
|
|
1 |
+
FROM ghcr.io/ggml-org/llama.cpp:full-cuda
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
# Check if user with UID 1000 exists, if not create it
|
4 |
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
|
5 |
USER 1000
|
6 |
+
|
7 |
ENV HOME=/home/user \
|
8 |
+
PATH=${PATH}:/home/user/.local/bin \
|
9 |
+
PATH=${PATH}:/app \
|
10 |
+
PATH=${PATH}:/usr/local/nvidia/bin
|
11 |
|
12 |
+
WORKDIR ${HOME}/app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
COPY --chown=1000 requirements.txt ${HOME}/app
|
|
|
|
|
15 |
|
16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
|
18 |
+
ENV PYTHONPATH=${PYTHONPATH}:${HOME}/.local/bin \
|
19 |
+
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/app \
|
20 |
+
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 \
|
21 |
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
22 |
GRADIO_ALLOW_FLAGGING=never \
|
23 |
GRADIO_NUM_PORTS=1 \
|
24 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
25 |
GRADIO_THEME=huggingface \
|
26 |
+
SYSTEM=spaces
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
COPY --chown=1000 . ${HOME}/app
|
29 |
|
30 |
+
ENTRYPOINT ["/bin/bash", "start.sh"]
|
docker-compose.yml
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
-
# Docker compose file to LOCAL development
|
2 |
-
|
3 |
services:
|
4 |
gguf-my-repo:
|
5 |
build:
|
6 |
context: .
|
7 |
dockerfile: Dockerfile
|
8 |
-
image: gguf-my-repo
|
9 |
container_name: gguf-my-repo
|
10 |
ports:
|
11 |
- "7860:7860"
|
12 |
volumes:
|
13 |
- .:/home/user/app
|
14 |
environment:
|
15 |
-
-
|
|
|
16 |
- HF_TOKEN=${HF_TOKEN}
|
|
|
|
|
|
|
|
1 |
services:
|
2 |
gguf-my-repo:
|
3 |
build:
|
4 |
context: .
|
5 |
dockerfile: Dockerfile
|
6 |
+
image: gguf-my-repo-cuda
|
7 |
container_name: gguf-my-repo
|
8 |
ports:
|
9 |
- "7860:7860"
|
10 |
volumes:
|
11 |
- .:/home/user/app
|
12 |
environment:
|
13 |
+
- RUN_CUDA=1
|
14 |
+
- RUN_LOCALLY=0
|
15 |
- HF_TOKEN=${HF_TOKEN}
|
16 |
+
- HF_HUB_CACHE=/home/user/app/downloads
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface-hub
|
2 |
+
hf-transfer
|
3 |
+
gradio[oauth]
|
4 |
+
gradio_huggingfacehub_search
|
5 |
+
APScheduler
|
start.sh
CHANGED
@@ -1,21 +1,9 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
-
if [ ! -d "llama.cpp" ]; then
|
4 |
-
# only run in dev env
|
5 |
-
git clone https://github.com/ggerganov/llama.cpp
|
6 |
-
fi
|
7 |
-
|
8 |
export GGML_CUDA=OFF
|
9 |
-
|
10 |
-
|
11 |
export GGML_CUDA=ON
|
12 |
fi
|
13 |
|
14 |
-
|
15 |
-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} -DLLAMA_CURL=OFF
|
16 |
-
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
|
17 |
-
cp ./build/bin/llama-* .
|
18 |
-
rm -rf build
|
19 |
-
|
20 |
-
cd ..
|
21 |
-
python app.py
|
|
|
1 |
#!/bin/bash
|
2 |
|
|
|
|
|
|
|
|
|
|
|
3 |
export GGML_CUDA=OFF
|
4 |
+
# enable CUDA
|
5 |
+
if [[ -z "${RUN_CUDA}" ]]; then
|
6 |
export GGML_CUDA=ON
|
7 |
fi
|
8 |
|
9 |
+
python3 app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|