Oleg Shulyakov commited on
Commit
b7bd975
·
1 Parent(s): 05d1b68

Migrate Docker to official llama.cpp CUDA image

Browse files
Files changed (6) hide show
  1. .dockerignore +15 -3
  2. .gitignore +203 -8
  3. Dockerfile +14 -49
  4. docker-compose.yml +4 -4
  5. requirements.txt +5 -0
  6. start.sh +3 -15
.dockerignore CHANGED
@@ -1,3 +1,15 @@
1
- /downloads
2
- /llama.cpp
3
- /outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IDE
2
+ .idea/
3
+ .vscode/
4
+
5
+ .git*
6
+ .dockerignore
7
+ docker-compose.yml
8
+ Dockerfile
9
+
10
+ # LLama.cpp
11
+ llama.cpp/
12
+
13
+ # Working files
14
+ downloads/
15
+ outputs/
.gitignore CHANGED
@@ -1,3 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
@@ -11,7 +150,6 @@ __pycache__/
11
  build/
12
  develop-eggs/
13
  dist/
14
- downloads/
15
  eggs/
16
  .eggs/
17
  lib/
@@ -106,10 +244,8 @@ ipython_config.py
106
  #pdm.lock
107
  # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
  # in version control.
109
- # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
  .pdm.toml
111
- .pdm-python
112
- .pdm-build/
113
 
114
  # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
  __pypackages__/
@@ -161,7 +297,66 @@ cython_debug/
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
163
 
164
- /downloads
165
- !/downloads/.keep
166
- /llama.cpp
167
- /outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows,python,jetbrains+all,visualstudiocode
3
+
4
+ ### JetBrains+all ###
5
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
6
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7
+
8
+ # User-specific stuff
9
+ .idea/**/workspace.xml
10
+ .idea/**/tasks.xml
11
+ .idea/**/usage.statistics.xml
12
+ .idea/**/dictionaries
13
+ .idea/**/shelf
14
+
15
+ # AWS User-specific
16
+ .idea/**/aws.xml
17
+
18
+ # Generated files
19
+ .idea/**/contentModel.xml
20
+
21
+ # Sensitive or high-churn files
22
+ .idea/**/dataSources/
23
+ .idea/**/dataSources.ids
24
+ .idea/**/dataSources.local.xml
25
+ .idea/**/sqlDataSources.xml
26
+ .idea/**/dynamic.xml
27
+ .idea/**/uiDesigner.xml
28
+ .idea/**/dbnavigator.xml
29
+
30
+ # Gradle
31
+ .idea/**/gradle.xml
32
+ .idea/**/libraries
33
+
34
+ # Gradle and Maven with auto-import
35
+ # When using Gradle or Maven with auto-import, you should exclude module files,
36
+ # since they will be recreated, and may cause churn. Uncomment if using
37
+ # auto-import.
38
+ # .idea/artifacts
39
+ # .idea/compiler.xml
40
+ # .idea/jarRepositories.xml
41
+ # .idea/modules.xml
42
+ # .idea/*.iml
43
+ # .idea/modules
44
+ # *.iml
45
+ # *.ipr
46
+
47
+ # CMake
48
+ cmake-build-*/
49
+
50
+ # Mongo Explorer plugin
51
+ .idea/**/mongoSettings.xml
52
+
53
+ # File-based project format
54
+ *.iws
55
+
56
+ # IntelliJ
57
+ out/
58
+
59
+ # mpeltonen/sbt-idea plugin
60
+ .idea_modules/
61
+
62
+ # JIRA plugin
63
+ atlassian-ide-plugin.xml
64
+
65
+ # Cursive Clojure plugin
66
+ .idea/replstate.xml
67
+
68
+ # SonarLint plugin
69
+ .idea/sonarlint/
70
+
71
+ # Crashlytics plugin (for Android Studio and IntelliJ)
72
+ com_crashlytics_export_strings.xml
73
+ crashlytics.properties
74
+ crashlytics-build.properties
75
+ fabric.properties
76
+
77
+ # Editor-based Rest Client
78
+ .idea/httpRequests
79
+
80
+ # Android studio 3.1+ serialized cache file
81
+ .idea/caches/build_file_checksums.ser
82
+
83
+ ### JetBrains+all Patch ###
84
+ # Ignore everything but code style settings and run configurations
85
+ # that are supposed to be shared within teams.
86
+
87
+ .idea/*
88
+
89
+ !.idea/codeStyles
90
+ !.idea/runConfigurations
91
+
92
+ ### Linux ###
93
+ *~
94
+
95
+ # temporary files which can be created if a process still has a handle open of a deleted file
96
+ .fuse_hidden*
97
+
98
+ # KDE directory preferences
99
+ .directory
100
+
101
+ # Linux trash folder which might appear on any partition or disk
102
+ .Trash-*
103
+
104
+ # .nfs files are created when an open file is removed but is still being accessed
105
+ .nfs*
106
+
107
+ ### macOS ###
108
+ # General
109
+ .DS_Store
110
+ .AppleDouble
111
+ .LSOverride
112
+
113
+ # Icon must end with two \r
114
+ Icon
115
+
116
+ # Thumbnails
117
+ ._*
118
+
119
+ # Files that might appear in the root of a volume
120
+ .DocumentRevisions-V100
121
+ .fseventsd
122
+ .Spotlight-V100
123
+ .TemporaryItems
124
+ .Trashes
125
+ .VolumeIcon.icns
126
+ .com.apple.timemachine.donotpresent
127
+
128
+ # Directories potentially created on remote AFP share
129
+ .AppleDB
130
+ .AppleDesktop
131
+ Network Trash Folder
132
+ Temporary Items
133
+ .apdisk
134
+
135
+ ### macOS Patch ###
136
+ # iCloud generated files
137
+ *.icloud
138
+
139
+ ### Python ###
140
  # Byte-compiled / optimized / DLL files
141
  __pycache__/
142
  *.py[cod]
 
150
  build/
151
  develop-eggs/
152
  dist/
 
153
  eggs/
154
  .eggs/
155
  lib/
 
244
  #pdm.lock
245
  # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
246
  # in version control.
247
+ # https://pdm.fming.dev/#use-with-ide
248
  .pdm.toml
 
 
249
 
250
  # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
251
  __pypackages__/
 
297
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
298
  #.idea/
299
 
300
+ ### Python Patch ###
301
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
302
+ poetry.toml
303
+
304
+ # ruff
305
+ .ruff_cache/
306
+
307
+ # LSP config files
308
+ pyrightconfig.json
309
+
310
+ ### VisualStudioCode ###
311
+ .vscode/*
312
+ !.vscode/settings.json
313
+ !.vscode/tasks.json
314
+ !.vscode/launch.json
315
+ !.vscode/extensions.json
316
+ !.vscode/*.code-snippets
317
+
318
+ # Local History for Visual Studio Code
319
+ .history/
320
+
321
+ # Built Visual Studio Code Extensions
322
+ *.vsix
323
+
324
+ ### VisualStudioCode Patch ###
325
+ # Ignore all local history of files
326
+ .history
327
+ .ionide
328
+
329
+ ### Windows ###
330
+ # Windows thumbnail cache files
331
+ Thumbs.db
332
+ Thumbs.db:encryptable
333
+ ehthumbs.db
334
+ ehthumbs_vista.db
335
+
336
+ # Dump file
337
+ *.stackdump
338
+
339
+ # Folder config file
340
+ [Dd]esktop.ini
341
+
342
+ # Recycle Bin used on file shares
343
+ $RECYCLE.BIN/
344
+
345
+ # Windows Installer files
346
+ *.cab
347
+ *.msi
348
+ *.msix
349
+ *.msm
350
+ *.msp
351
+
352
+ # Windows shortcuts
353
+ *.lnk
354
+
355
+ # End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
356
+
357
+ # Working folders
358
+ downloads/
359
+ outputs/
360
+ llama.cpp/
361
+
362
+ !*/.keep
Dockerfile CHANGED
@@ -1,65 +1,30 @@
1
- FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
2
-
3
- ENV DEBIAN_FRONTEND=noninteractive
4
- RUN apt-get update && \
5
- apt-get upgrade -y && \
6
- apt-get install -y --no-install-recommends --fix-missing \
7
- git \
8
- git-lfs \
9
- wget \
10
- curl \
11
- cmake \
12
- # python build dependencies \
13
- build-essential \
14
- libssl-dev \
15
- zlib1g-dev \
16
- libbz2-dev \
17
- libreadline-dev \
18
- libsqlite3-dev \
19
- libncursesw5-dev \
20
- xz-utils \
21
- tk-dev \
22
- libxml2-dev \
23
- libxmlsec1-dev \
24
- libffi-dev \
25
- liblzma-dev \
26
- ffmpeg \
27
- nvidia-driver-570
28
 
29
  # Check if user with UID 1000 exists, if not create it
30
  RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
31
  USER 1000
 
32
  ENV HOME=/home/user \
33
- PATH=/home/user/.local/bin:${PATH}
34
- WORKDIR ${HOME}/app
 
35
 
36
- RUN curl https://pyenv.run | bash
37
- ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
38
- ARG PYTHON_VERSION=3.11
39
- RUN pyenv install ${PYTHON_VERSION} && \
40
- pyenv global ${PYTHON_VERSION} && \
41
- pyenv rehash && \
42
- pip install --no-cache-dir -U pip setuptools wheel && \
43
- pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search" "APScheduler"
44
 
45
- COPY --chown=1000 . ${HOME}/app
46
- RUN git clone https://github.com/ggerganov/llama.cpp
47
- RUN pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
48
 
49
- COPY groups_merged.txt ${HOME}/app/llama.cpp/
50
 
51
- ENV PYTHONPATH=${HOME}/app \
52
- PYTHONUNBUFFERED=1 \
 
53
  HF_HUB_ENABLE_HF_TRANSFER=1 \
54
  GRADIO_ALLOW_FLAGGING=never \
55
  GRADIO_NUM_PORTS=1 \
56
  GRADIO_SERVER_NAME=0.0.0.0 \
57
  GRADIO_THEME=huggingface \
58
- TQDM_POSITION=-1 \
59
- TQDM_MININTERVAL=1 \
60
- SYSTEM=spaces \
61
- LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
62
- PATH=/usr/local/nvidia/bin:${PATH}
63
 
64
- ENTRYPOINT /bin/bash start.sh
65
 
 
 
1
+ FROM ghcr.io/ggml-org/llama.cpp:full-cuda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # Check if user with UID 1000 exists, if not create it
4
  RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
5
  USER 1000
6
+
7
  ENV HOME=/home/user \
8
+ PATH=${PATH}:/home/user/.local/bin \
9
+ PATH=${PATH}:/app \
10
+ PATH=${PATH}:/usr/local/nvidia/bin
11
 
12
+ WORKDIR ${HOME}/app
 
 
 
 
 
 
 
13
 
14
+ COPY --chown=1000 requirements.txt ${HOME}/app
 
 
15
 
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
 
18
+ ENV PYTHONPATH=${PYTHONPATH}:${HOME}/.local/bin \
19
+ LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/app \
20
+ LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 \
21
  HF_HUB_ENABLE_HF_TRANSFER=1 \
22
  GRADIO_ALLOW_FLAGGING=never \
23
  GRADIO_NUM_PORTS=1 \
24
  GRADIO_SERVER_NAME=0.0.0.0 \
25
  GRADIO_THEME=huggingface \
26
+ SYSTEM=spaces
 
 
 
 
27
 
28
+ COPY --chown=1000 . ${HOME}/app
29
 
30
+ ENTRYPOINT ["/bin/bash", "start.sh"]
docker-compose.yml CHANGED
@@ -1,16 +1,16 @@
1
- # Docker compose file to LOCAL development
2
-
3
  services:
4
  gguf-my-repo:
5
  build:
6
  context: .
7
  dockerfile: Dockerfile
8
- image: gguf-my-repo
9
  container_name: gguf-my-repo
10
  ports:
11
  - "7860:7860"
12
  volumes:
13
  - .:/home/user/app
14
  environment:
15
- - RUN_LOCALLY=1
 
16
  - HF_TOKEN=${HF_TOKEN}
 
 
 
 
1
  services:
2
  gguf-my-repo:
3
  build:
4
  context: .
5
  dockerfile: Dockerfile
6
+ image: gguf-my-repo-cuda
7
  container_name: gguf-my-repo
8
  ports:
9
  - "7860:7860"
10
  volumes:
11
  - .:/home/user/app
12
  environment:
13
+ - RUN_CUDA=1
14
+ - RUN_LOCALLY=0
15
  - HF_TOKEN=${HF_TOKEN}
16
+ - HF_HUB_CACHE=/home/user/app/downloads
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface-hub
2
+ hf-transfer
3
+ gradio[oauth]
4
+ gradio_huggingfacehub_search
5
+ APScheduler
start.sh CHANGED
@@ -1,21 +1,9 @@
1
  #!/bin/bash
2
 
3
- if [ ! -d "llama.cpp" ]; then
4
- # only run in dev env
5
- git clone https://github.com/ggerganov/llama.cpp
6
- fi
7
-
8
  export GGML_CUDA=OFF
9
- if [[ -z "${RUN_LOCALLY}" ]]; then
10
- # enable CUDA if NOT running locally
11
  export GGML_CUDA=ON
12
  fi
13
 
14
- cd llama.cpp
15
- cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} -DLLAMA_CURL=OFF
16
- cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
17
- cp ./build/bin/llama-* .
18
- rm -rf build
19
-
20
- cd ..
21
- python app.py
 
1
  #!/bin/bash
2
 
 
 
 
 
 
3
  export GGML_CUDA=OFF
4
+ # enable CUDA
5
+ if [[ -z "${RUN_CUDA}" ]]; then
6
  export GGML_CUDA=ON
7
  fi
8
 
9
+ python3 app.py