Spaces:

ggml-org
/

gguf-my-repo

Running on A10G

App Files Files Community

187

Oleg Shulyakov commited on 8 days ago

Commit

b7bd975

1 Parent(s): 05d1b68

Migrate Docker to official llama.cpp CUDA image

Browse files

Files changed (6) hide show

.dockerignore +15 -3
.gitignore +203 -8
Dockerfile +14 -49
docker-compose.yml +4 -4
requirements.txt +5 -0
start.sh +3 -15

.dockerignore CHANGED Viewed

@@ -1,3 +1,15 @@
-/downloads
-/llama.cpp
-/outputs

+# IDE
+.idea/
+.vscode/
+.git*
+.dockerignore
+docker-compose.yml
+Dockerfile
+# LLama.cpp
+llama.cpp/
+# Working files
+downloads/
+outputs/

.gitignore CHANGED Viewed

@@ -1,3 +1,142 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -11,7 +150,6 @@ __pycache__/
 build/
 develop-eggs/
 dist/
-downloads/
 eggs/
 .eggs/
 lib/
@@ -106,10 +244,8 @@ ipython_config.py
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
-.pdm-python
-.pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@@ -161,7 +297,66 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-/downloads
-!/downloads/.keep
-/llama.cpp
-/outputs

+# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
+# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows,python,jetbrains+all,visualstudiocode
+### JetBrains+all ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+### JetBrains+all Patch ###
+# Ignore everything but code style settings and run configurations
+# that are supposed to be shared within teams.
+.idea/*
+!.idea/codeStyles
+!.idea/runConfigurations
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 build/
 develop-eggs/
 dist/
 eggs/
 .eggs/
 lib/
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
+#   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+# Local History for Visual Studio Code
+.history/
+# Built Visual Studio Code Extensions
+*.vsix
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+# Dump file
+*.stackdump
+# Folder config file
+[Dd]esktop.ini
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# Windows shortcuts
+*.lnk
+# End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
+# Working folders
+downloads/
+outputs/
+llama.cpp/
+!*/.keep

Dockerfile CHANGED Viewed

@@ -1,65 +1,30 @@
-FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install -y --no-install-recommends --fix-missing \
-    git \
-    git-lfs \
-    wget \
-    curl \
-    cmake \
-    # python build dependencies \
-    build-essential \
-    libssl-dev \
-    zlib1g-dev \
-    libbz2-dev \
-    libreadline-dev \
-    libsqlite3-dev \
-    libncursesw5-dev \
-    xz-utils \
-    tk-dev \
-    libxml2-dev \
-    libxmlsec1-dev \
-    libffi-dev \
-    liblzma-dev \
-    ffmpeg \
-    nvidia-driver-570
 # Check if user with UID 1000 exists, if not create it
 RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
 USER 1000
 ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:${PATH}
-WORKDIR ${HOME}/app
-RUN curl https://pyenv.run | bash
-ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
-ARG PYTHON_VERSION=3.11
-RUN pyenv install ${PYTHON_VERSION} && \
-    pyenv global ${PYTHON_VERSION} && \
-    pyenv rehash && \
-    pip install --no-cache-dir -U pip setuptools wheel && \
-    pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search" "APScheduler"
-COPY --chown=1000 . ${HOME}/app
-RUN git clone https://github.com/ggerganov/llama.cpp
-RUN pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
-COPY groups_merged.txt ${HOME}/app/llama.cpp/
-ENV PYTHONPATH=${HOME}/app \
-    PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     GRADIO_ALLOW_FLAGGING=never \
     GRADIO_NUM_PORTS=1 \
     GRADIO_SERVER_NAME=0.0.0.0 \
     GRADIO_THEME=huggingface \
-    TQDM_POSITION=-1 \
-    TQDM_MININTERVAL=1 \
-    SYSTEM=spaces \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
-    PATH=/usr/local/nvidia/bin:${PATH}
-ENTRYPOINT /bin/bash start.sh

+FROM ghcr.io/ggml-org/llama.cpp:full-cuda
 # Check if user with UID 1000 exists, if not create it
 RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
 USER 1000
 ENV HOME=/home/user \
+    PATH=${PATH}:/home/user/.local/bin \
+    PATH=${PATH}:/app \
+    PATH=${PATH}:/usr/local/nvidia/bin
+WORKDIR ${HOME}/app
+COPY --chown=1000 requirements.txt ${HOME}/app
+RUN pip install --no-cache-dir -r requirements.txt
+ENV PYTHONPATH=${PYTHONPATH}:${HOME}/.local/bin \
+    LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/app \
+    LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     GRADIO_ALLOW_FLAGGING=never \
     GRADIO_NUM_PORTS=1 \
     GRADIO_SERVER_NAME=0.0.0.0 \
     GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+COPY --chown=1000 . ${HOME}/app
+ENTRYPOINT ["/bin/bash", "start.sh"]

docker-compose.yml CHANGED Viewed

@@ -1,16 +1,16 @@
-# Docker compose file to LOCAL development
 services:
   gguf-my-repo:
     build:
       context: .
       dockerfile: Dockerfile
-    image: gguf-my-repo
     container_name: gguf-my-repo
     ports:
       - "7860:7860"
     volumes:
       - .:/home/user/app
     environment:
-      - RUN_LOCALLY=1
       - HF_TOKEN=${HF_TOKEN}

 services:
   gguf-my-repo:
     build:
       context: .
       dockerfile: Dockerfile
+    image: gguf-my-repo-cuda
     container_name: gguf-my-repo
     ports:
       - "7860:7860"
     volumes:
       - .:/home/user/app
     environment:
+      - RUN_CUDA=1
+      - RUN_LOCALLY=0
       - HF_TOKEN=${HF_TOKEN}
+      - HF_HUB_CACHE=/home/user/app/downloads

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+huggingface-hub
+hf-transfer
+gradio[oauth]
+gradio_huggingfacehub_search
+APScheduler

start.sh CHANGED Viewed

@@ -1,21 +1,9 @@
 #!/bin/bash
-if [ ! -d "llama.cpp" ]; then
-  # only run in dev env
-  git clone https://github.com/ggerganov/llama.cpp
-fi
 export GGML_CUDA=OFF
-if [[ -z "${RUN_LOCALLY}" ]]; then
-  # enable CUDA if NOT running locally
   export GGML_CUDA=ON
 fi
-cd llama.cpp
-cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} -DLLAMA_CURL=OFF
-cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
-cp ./build/bin/llama-* .
-rm -rf build
-cd ..
-python app.py

 #!/bin/bash
 export GGML_CUDA=OFF
+# enable CUDA
+if [[ -z "${RUN_CUDA}" ]]; then
   export GGML_CUDA=ON
 fi
+python3 app.py