Spaces:
Paused
Paused
# Lite version | |
FROM python:3.10-slim AS lite | |
# Common dependencies | |
RUN apt-get update -qqy && \ | |
apt-get install -y --no-install-recommends \ | |
ssh \ | |
git \ | |
gcc \ | |
g++ \ | |
poppler-utils \ | |
libpoppler-dev \ | |
unzip \ | |
curl \ | |
cargo | |
# Setup args | |
ARG TARGETPLATFORM | |
ARG TARGETARCH | |
# Set environment variables | |
ENV PYTHONDONTWRITEBYTECODE=1 | |
ENV PYTHONUNBUFFERED=1 | |
ENV PYTHONIOENCODING=UTF-8 | |
ENV TARGETARCH=${TARGETARCH} | |
# Create working directory | |
WORKDIR /app | |
# Download pdfjs | |
COPY scripts/download_pdfjs.sh /app/scripts/download_pdfjs.sh | |
RUN chmod +x /app/scripts/download_pdfjs.sh | |
ENV PDFJS_PREBUILT_DIR="/app/libs/ktem/ktem/assets/prebuilt/pdfjs-dist" | |
RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR | |
# Copy contents | |
COPY . /app | |
COPY .env.example /app/.env | |
# Install pip packages | |
RUN --mount=type=ssh \ | |
--mount=type=cache,target=/root/.cache/pip \ | |
pip install -e "libs/kotaemon" \ | |
&& pip install -e "libs/ktem" \ | |
&& pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" | |
RUN --mount=type=ssh \ | |
--mount=type=cache,target=/root/.cache/pip \ | |
if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi | |
# Clean up | |
RUN apt-get autoremove \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& rm -rf ~/.cache | |
CMD ["python", "app.py"] | |
# Full version | |
FROM lite AS full | |
# Additional dependencies for full version | |
RUN apt-get update -qqy && \ | |
apt-get install -y --no-install-recommends \ | |
tesseract-ocr \ | |
tesseract-ocr-jpn \ | |
libsm6 \ | |
libxext6 \ | |
libreoffice \ | |
ffmpeg \ | |
libmagic-dev | |
# Install torch and torchvision for unstructured | |
RUN --mount=type=ssh \ | |
--mount=type=cache,target=/root/.cache/pip \ | |
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu | |
# Install additional pip packages | |
RUN --mount=type=ssh \ | |
--mount=type=cache,target=/root/.cache/pip \ | |
pip install -e "libs/kotaemon[adv]" \ | |
&& pip install unstructured[all-docs] | |
# Install lightRAG | |
ENV USE_LIGHTRAG=true | |
RUN --mount=type=ssh \ | |
--mount=type=cache,target=/root/.cache/pip \ | |
pip install aioboto3 nano-vectordb ollama xxhash lightrag-hku | |
# Clean up | |
RUN apt-get autoremove \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& rm -rf ~/.cache | |
# Download nltk packages as required for unstructured | |
RUN python -c "from unstructured.nlp.tokenize import _download_nltk_packages_if_not_present; _download_nltk_packages_if_not_present()" | |
CMD ["python", "app.py"] | |