Spaces:

CerealDev
/

Docling-UI

Configuration error

App Files Files Community

vishnoianil

dolfim-ibm commited on Sep 12, 2024

Commit

6237d61

unverified ·

1 Parent(s): 44657b5

Publish docling-serve images to ghcr and quay container repositories (#1)

Browse files

* Publish docling-serve images to ghcr and quay container repositories

This commit:
- Creates container file to build the image for cpu only
and default "with gpu" images
- Adds workflow jobs to publish the images to ghcr.io and quay.io
- Makefile to build these images locally for linux/amd64 platform.

Signed-off-by: Anil Vishnoi <[email protected]>

* make cpu-only version with poetry

Signed-off-by: Michele Dolfi <[email protected]>

* Rename cpu/gpu image names and also update the supported platforms

Signed-off-by: Anil Vishnoi <[email protected]>

---------

Signed-off-by: Anil Vishnoi <[email protected]>
Signed-off-by: Michele Dolfi <[email protected]>
Co-authored-by: Michele Dolfi <[email protected]>

Files changed (5) hide show

.github/workflow/images.yml +152 -0
Containerfile +32 -1
Makefile +29 -0
poetry.lock +0 -0
pyproject.toml +27 -4

.github/workflow/images.yml ADDED Viewed

	@@ -0,0 +1,152 @@

+name: Publish docling-serve images
+on:
+  push:
+    branches:
+      - main
+env:
+  GHCR_REGISTRY: ghcr.io
+  GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ${{ github.repository }}/ds4sd/docling-serve-cpu
+  GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ${{ github.repository }}/ds4sd/docling-serve
+  QUAY_REGISTRY: quay.io
+  QUAY_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
+  QUAY_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
+jobs:
+  build_and_publish_ui_image:
+    name: Push docling-serve container images to GHCR and QUAY
+    runs-on: ubuntu-latest
+    environment: registry-creds
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+      - name: Log in to the GHCR container image registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.GHCR_REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Log in to the Quay container image registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.QUAY_REGISTRY }}
+          username: ${{ secrets.QUAY_USERNAME }}
+          password: ${{ secrets.QUAY_TOKEN }}
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Cache Docker layers
+        uses: actions/cache@v4
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+      - name: Extract metadata (tags, labels) for docling-serve/cpu ghcr image
+        id: ghcr_serve_cpu_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME }}
+      - name: Extract metadata (tags, labels) for docling-serve/gpu ghcr image
+        id: ghcr_serve_gpu_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME }}
+      - name: Extract metadata (tags, labels) for docling-serve/cpu quay image
+        id: quay_serve_cpu_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_CPU_IMAGE_NAME }}
+      - name: Extract metadata (tags, labels) for docking-serve/gpu quay image
+        id: quay_serve_gpu_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_GPU_IMAGE_NAME }}
+      - name: Build and push docling-serve/cpu image to ghcr.io
+        id: push-serve-cpu-ghcr
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.ghcr_serve_cpu_meta.outputs.tags }}
+          labels: ${{ steps.ghcr_serve_cpu_meta.outputs.labels }}
+          platforms: linux/amd64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: |
+            --build-arg CPU_ONLY=true
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME}}
+          subject-digest: ${{ steps.push-serve-cpu-ghcr.outputs.digest }}
+          push-to-registry: true
+      - name: Build and push docling-serve/gpu image to ghcr.io
+        id: push-serve-gpu-ghcr
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.ghcr_serve_gpu_meta.outputs.tags }}
+          labels: ${{ steps.ghcr_serve_gpu_meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: |
+            --build-arg CPU_ONLY=false
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME}}
+          subject-digest: ${{ steps.push-serve-gpu-ghcr.outputs.digest }}
+          push-to-registry: true
+      - name: Build and push docling-serve/cpu image to quay.io
+        id: push-serve-cpu-quay
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.quay_serve_cpu_meta.outputs.tags }}
+          labels: ${{ steps.quay_serve_cpu_meta.outputs.labels }}
+          platforms: linux/amd64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: |
+            --build-arg CPU_ONLY=true
+      - name: Build and push docling-serve/gpu image to quay.io
+        id: push-serve-gpu-quay
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.quay_serve_gpu_meta.outputs.tags }}
+          labels: ${{ steps.quay_serve_gpu_meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: |
+            --build-arg CPU_ONLY=false

Containerfile CHANGED Viewed

	@@ -1 +1,32 @@
1	- # ~~TODO~~

+FROM python:3.11-slim-bookworm
+ARG CPU_ONLY=false
+WORKDIR /docling-serve
+RUN apt-get update \
+    && apt-get install -y libgl1 libglib2.0-0 curl wget git \
+    && apt-get clean
+RUN pip install --no-cache-dir poetry
+COPY pyproject.toml poetry.lock README.md /docling-serve/
+RUN if [ "$CPU_ONLY" = "true" ]; then \
+    poetry install --no-root --with cpu; \
+    else \
+        poetry install --no-root; \
+    fi
+ENV HF_HOME=/tmp/
+ENV TORCH_HOME=/tmp/
+RUN poetry run python -c 'from docling.document_converter import DocumentConverter; artifacts_path = DocumentConverter.download_models_hf(force=True);'
+# On container environments, always set a thread budget to avoid undesired thread congestion.
+ENV OMP_NUM_THREADS=4
+COPY ./docling_serve /docling-serve/docling_serve
+EXPOSE 5000
+CMD ["poetry", "run", "uvicorn", "--port", "5000", "docling_serve.app:app"]

Makefile ADDED Viewed

	@@ -0,0 +1,29 @@

+.PHONY: help
+help:
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+#
+# If you want to see the full commands, run:
+#   NOISY_BUILD=y make
+#
+ifeq ($(NOISY_BUILD),)
+    ECHO_PREFIX=@
+    CMD_PREFIX=@
+else
+    ECHO_PREFIX=@\#
+    CMD_PREFIX=    PIPE_DEV_NULL=
+endif
+TAG=$(shell git rev-parse HEAD)
+docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" continaer image
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU ONLY]"
+	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main
+docling-serve-gpu-image: Containerfile ## Build docling-serve continaer image with GPU support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with GPU]"
+	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -15,7 +15,7 @@ maintainers = [
     "Christoph Auer <[email protected]>",
     "Michele Dolfi <[email protected]>",
     "Cesar Berrospi Ramis <[email protected]>",
-    "Panos Vagenas <[email protected]>",
 ]
 readme = "README.md"
 repository = "https://github.com/DS4SD/docling-serve"
@@ -29,15 +29,39 @@ classifiers = [
     "Programming Language :: Python :: 3"
 ]
 [tool.poetry.dependencies]
 python = "^3.10"
-docling = "^1.9.0"
 fastapi = {version = "^0.110.2", extras = ["standard"]}
 uvicorn = "^0.30.6"
 pydantic-settings = "^2.4.0"
 httpx = "^0.27.2"
 [tool.poetry.group.dev.dependencies]
 black = "^24.8.0"
@@ -52,7 +76,6 @@ mypy = "^1.11.2"
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
 target-version = ["py310"]

     "Christoph Auer <[email protected]>",
     "Michele Dolfi <[email protected]>",
     "Cesar Berrospi Ramis <[email protected]>",
+    "Panos Vagenas <[email protected]>",
 ]
 readme = "README.md"
 repository = "https://github.com/DS4SD/docling-serve"
     "Programming Language :: Python :: 3"
 ]
 [tool.poetry.dependencies]
 python = "^3.10"
+docling = "^1.11.0"
 fastapi = {version = "^0.110.2", extras = ["standard"]}
 uvicorn = "^0.30.6"
 pydantic-settings = "^2.4.0"
 httpx = "^0.27.2"
+[tool.poetry.group.pypi-torch]
+optional = false
+[tool.poetry.group.pypi-torch.dependencies]
+torch = [
+  {version = "!=2.4.1+cpu" },
+]
+torchvision = [
+  {version = "!=0.19.1+cpu" },
+]
+[tool.poetry.group.cpu]
+optional = true
+[tool.poetry.group.cpu.dependencies]
+torch = [
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
+]
+torchvision = [
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
+    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
+]
 [tool.poetry.group.dev.dependencies]
 black = "^24.8.0"
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
 target-version = ["py310"]