Updated Dockefile to use cache (#2703)
Browse files### What problem does this PR solve?
Updated Dockefile to use cache
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [x] Other (please describe): CI
- .github/workflows/tests.yml +1 -1
- Dockerfile +21 -14
- Dockerfile.slim +21 -14
- README.md +2 -2
- README_ja.md +2 -2
- README_ko.md +2 -2
- README_zh.md +2 -2
- docs/guides/develop/build_docker_image.md +2 -2
- download_deps.py +7 -2
    	
        .github/workflows/tests.yml
    CHANGED
    
    | @@ -48,7 +48,7 @@ jobs: | |
| 48 | 
             
                  - name: Build ragflow:dev-slim
         | 
| 49 | 
             
                    run: |
         | 
| 50 | 
             
                      RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
         | 
| 51 | 
            -
                      cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co .
         | 
| 52 | 
             
                      sudo docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 53 |  | 
| 54 | 
             
                  - name: Build ragflow:dev
         | 
|  | |
| 48 | 
             
                  - name: Build ragflow:dev-slim
         | 
| 49 | 
             
                    run: |
         | 
| 50 | 
             
                      RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
         | 
| 51 | 
            +
                      cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data .
         | 
| 52 | 
             
                      sudo docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 53 |  | 
| 54 | 
             
                  - name: Build ragflow:dev
         | 
    	
        Dockerfile
    CHANGED
    
    | @@ -6,13 +6,18 @@ ENV LIGHTEN=0 | |
| 6 |  | 
| 7 | 
             
            WORKDIR /ragflow
         | 
| 8 |  | 
| 9 | 
            -
            RUN  | 
|  | |
|  | |
|  | |
|  | |
| 10 |  | 
| 11 | 
             
            # if you located in China, you can use tsinghua mirror to speed up apt
         | 
| 12 | 
             
            RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
         | 
| 13 |  | 
| 14 | 
            -
            RUN apt  | 
| 15 | 
            -
                && apt  | 
|  | |
| 16 |  | 
| 17 | 
             
            RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
         | 
| 18 |  | 
| @@ -22,7 +27,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 | |
| 22 | 
             
            ENV POETRY_NO_INTERACTION=1
         | 
| 23 | 
             
            ENV POETRY_VIRTUALENVS_IN_PROJECT=true
         | 
| 24 | 
             
            ENV POETRY_VIRTUALENVS_CREATE=true
         | 
| 25 | 
            -
            ENV POETRY_KEYRING_ENABLED=false
         | 
| 26 | 
             
            ENV POETRY_REQUESTS_TIMEOUT=15
         | 
| 27 |  | 
| 28 | 
             
            # builder stage
         | 
| @@ -31,16 +35,18 @@ USER root | |
| 31 |  | 
| 32 | 
             
            WORKDIR /ragflow
         | 
| 33 |  | 
| 34 | 
            -
            RUN apt  | 
| 35 | 
            -
                 | 
|  | |
| 36 |  | 
| 37 | 
             
            COPY web web
         | 
| 38 | 
            -
            RUN  | 
|  | |
| 39 |  | 
| 40 | 
             
            # install dependencies from poetry.lock file
         | 
| 41 | 
             
            COPY pyproject.toml poetry.toml poetry.lock ./
         | 
| 42 |  | 
| 43 | 
            -
            RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
         | 
| 44 | 
             
                if [ "$LIGHTEN" -eq 0 ]; then \
         | 
| 45 | 
             
                    poetry install --sync --no-cache --no-root --with=full; \
         | 
| 46 | 
             
                else \
         | 
| @@ -55,8 +61,9 @@ WORKDIR /ragflow | |
| 55 |  | 
| 56 | 
             
            # Install python packages' dependencies
         | 
| 57 | 
             
            # cv2 requires libGL.so.1
         | 
| 58 | 
            -
            RUN apt  | 
| 59 | 
            -
                 | 
|  | |
| 60 |  | 
| 61 | 
             
            COPY web web
         | 
| 62 | 
             
            COPY api api
         | 
| @@ -82,16 +89,16 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ | |
| 82 | 
             
                    /huggingface.co/maidalun1020/bce-reranker-base_v1 \
         | 
| 83 | 
             
                    | tar -xf - --strip-components=2 -C /root/.ragflow
         | 
| 84 |  | 
|  | |
|  | |
|  | |
| 85 | 
             
            # Copy compiled web pages
         | 
| 86 | 
             
            COPY --from=builder /ragflow/web/dist /ragflow/web/dist
         | 
| 87 |  | 
| 88 | 
             
            # Copy Python environment and packages
         | 
| 89 | 
             
            ENV VIRTUAL_ENV=/ragflow/.venv
         | 
| 90 | 
             
            COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
         | 
| 91 | 
            -
            ENV PATH="${VIRTUAL_ENV}/bin | 
| 92 | 
            -
             | 
| 93 | 
            -
            # Download nltk data
         | 
| 94 | 
            -
            RUN python3 -m nltk.downloader wordnet punkt punkt_tab
         | 
| 95 |  | 
| 96 | 
             
            ENV PYTHONPATH=/ragflow/
         | 
| 97 |  | 
|  | |
| 6 |  | 
| 7 | 
             
            WORKDIR /ragflow
         | 
| 8 |  | 
| 9 | 
            +
            RUN rm -f /etc/apt/apt.conf.d/docker-clean \
         | 
| 10 | 
            +
                && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
         | 
| 13 | 
            +
                apt update && apt-get --no-install-recommends install -y ca-certificates
         | 
| 14 |  | 
| 15 | 
             
            # if you located in China, you can use tsinghua mirror to speed up apt
         | 
| 16 | 
             
            RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
         | 
| 17 |  | 
| 18 | 
            +
            RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
         | 
| 19 | 
            +
                apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
         | 
| 20 | 
            +
                && rm -rf /var/lib/apt/lists/*
         | 
| 21 |  | 
| 22 | 
             
            RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
         | 
| 23 |  | 
|  | |
| 27 | 
             
            ENV POETRY_NO_INTERACTION=1
         | 
| 28 | 
             
            ENV POETRY_VIRTUALENVS_IN_PROJECT=true
         | 
| 29 | 
             
            ENV POETRY_VIRTUALENVS_CREATE=true
         | 
|  | |
| 30 | 
             
            ENV POETRY_REQUESTS_TIMEOUT=15
         | 
| 31 |  | 
| 32 | 
             
            # builder stage
         | 
|  | |
| 35 |  | 
| 36 | 
             
            WORKDIR /ragflow
         | 
| 37 |  | 
| 38 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_apt,target=/var/cache/apt,sharing=locked \
         | 
| 39 | 
            +
                apt update && apt install -y nodejs npm cargo && \
         | 
| 40 | 
            +
                rm -rf /var/lib/apt/lists/*
         | 
| 41 |  | 
| 42 | 
             
            COPY web web
         | 
| 43 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
         | 
| 44 | 
            +
                cd web && npm i --force && npm run build
         | 
| 45 |  | 
| 46 | 
             
            # install dependencies from poetry.lock file
         | 
| 47 | 
             
            COPY pyproject.toml poetry.toml poetry.lock ./
         | 
| 48 |  | 
| 49 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
         | 
| 50 | 
             
                if [ "$LIGHTEN" -eq 0 ]; then \
         | 
| 51 | 
             
                    poetry install --sync --no-cache --no-root --with=full; \
         | 
| 52 | 
             
                else \
         | 
|  | |
| 61 |  | 
| 62 | 
             
            # Install python packages' dependencies
         | 
| 63 | 
             
            # cv2 requires libGL.so.1
         | 
| 64 | 
            +
            RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
         | 
| 65 | 
            +
                apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
         | 
| 66 | 
            +
                rm -rf /var/lib/apt/lists/*
         | 
| 67 |  | 
| 68 | 
             
            COPY web web
         | 
| 69 | 
             
            COPY api api
         | 
|  | |
| 89 | 
             
                    /huggingface.co/maidalun1020/bce-reranker-base_v1 \
         | 
| 90 | 
             
                    | tar -xf - --strip-components=2 -C /root/.ragflow
         | 
| 91 |  | 
| 92 | 
            +
            # Copy nltk data downloaded via download_deps.py
         | 
| 93 | 
            +
            COPY nltk_data /root/nltk_data
         | 
| 94 | 
            +
             | 
| 95 | 
             
            # Copy compiled web pages
         | 
| 96 | 
             
            COPY --from=builder /ragflow/web/dist /ragflow/web/dist
         | 
| 97 |  | 
| 98 | 
             
            # Copy Python environment and packages
         | 
| 99 | 
             
            ENV VIRTUAL_ENV=/ragflow/.venv
         | 
| 100 | 
             
            COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
         | 
| 101 | 
            +
            ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
         | 
|  | |
|  | |
|  | |
| 102 |  | 
| 103 | 
             
            ENV PYTHONPATH=/ragflow/
         | 
| 104 |  | 
    	
        Dockerfile.slim
    CHANGED
    
    | @@ -6,13 +6,18 @@ ENV LIGHTEN=1 | |
| 6 |  | 
| 7 | 
             
            WORKDIR /ragflow
         | 
| 8 |  | 
| 9 | 
            -
            RUN  | 
|  | |
|  | |
|  | |
|  | |
| 10 |  | 
| 11 | 
             
            # if you located in China, you can use tsinghua mirror to speed up apt
         | 
| 12 | 
             
            RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
         | 
| 13 |  | 
| 14 | 
            -
            RUN apt  | 
| 15 | 
            -
                && apt  | 
|  | |
| 16 |  | 
| 17 | 
             
            RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
         | 
| 18 |  | 
| @@ -22,7 +27,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 | |
| 22 | 
             
            ENV POETRY_NO_INTERACTION=1
         | 
| 23 | 
             
            ENV POETRY_VIRTUALENVS_IN_PROJECT=true
         | 
| 24 | 
             
            ENV POETRY_VIRTUALENVS_CREATE=true
         | 
| 25 | 
            -
            ENV POETRY_KEYRING_ENABLED=false
         | 
| 26 | 
             
            ENV POETRY_REQUESTS_TIMEOUT=15
         | 
| 27 |  | 
| 28 | 
             
            # builder stage
         | 
| @@ -31,16 +35,18 @@ USER root | |
| 31 |  | 
| 32 | 
             
            WORKDIR /ragflow
         | 
| 33 |  | 
| 34 | 
            -
            RUN apt  | 
| 35 | 
            -
                 | 
|  | |
| 36 |  | 
| 37 | 
             
            COPY web web
         | 
| 38 | 
            -
            RUN  | 
|  | |
| 39 |  | 
| 40 | 
             
            # install dependencies from poetry.lock file
         | 
| 41 | 
             
            COPY pyproject.toml poetry.toml poetry.lock ./
         | 
| 42 |  | 
| 43 | 
            -
            RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
         | 
| 44 | 
             
                if [ "$LIGHTEN" -eq 0 ]; then \
         | 
| 45 | 
             
                    poetry install --sync --no-cache --no-root --with=full; \
         | 
| 46 | 
             
                else \
         | 
| @@ -55,8 +61,9 @@ WORKDIR /ragflow | |
| 55 |  | 
| 56 | 
             
            # Install python packages' dependencies
         | 
| 57 | 
             
            # cv2 requires libGL.so.1
         | 
| 58 | 
            -
            RUN apt  | 
| 59 | 
            -
                 | 
|  | |
| 60 |  | 
| 61 | 
             
            COPY web web
         | 
| 62 | 
             
            COPY api api
         | 
| @@ -75,16 +82,16 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ | |
| 75 | 
             
                    /huggingface.co/InfiniFlow/deepdoc \
         | 
| 76 | 
             
                    | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
         | 
| 77 |  | 
|  | |
|  | |
|  | |
| 78 | 
             
            # Copy compiled web pages
         | 
| 79 | 
             
            COPY --from=builder /ragflow/web/dist /ragflow/web/dist
         | 
| 80 |  | 
| 81 | 
             
            # Copy Python environment and packages
         | 
| 82 | 
             
            ENV VIRTUAL_ENV=/ragflow/.venv
         | 
| 83 | 
             
            COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
         | 
| 84 | 
            -
            ENV PATH="${VIRTUAL_ENV}/bin | 
| 85 | 
            -
             | 
| 86 | 
            -
            # Download nltk data
         | 
| 87 | 
            -
            RUN python3 -m nltk.downloader wordnet punkt punkt_tab
         | 
| 88 |  | 
| 89 | 
             
            ENV PYTHONPATH=/ragflow/
         | 
| 90 |  | 
|  | |
| 6 |  | 
| 7 | 
             
            WORKDIR /ragflow
         | 
| 8 |  | 
| 9 | 
            +
            RUN rm -f /etc/apt/apt.conf.d/docker-clean \
         | 
| 10 | 
            +
                && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
         | 
| 13 | 
            +
                apt update && apt-get --no-install-recommends install -y ca-certificates
         | 
| 14 |  | 
| 15 | 
             
            # if you located in China, you can use tsinghua mirror to speed up apt
         | 
| 16 | 
             
            RUN  sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
         | 
| 17 |  | 
| 18 | 
            +
            RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
         | 
| 19 | 
            +
                apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
         | 
| 20 | 
            +
                && rm -rf /var/lib/apt/lists/*
         | 
| 21 |  | 
| 22 | 
             
            RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
         | 
| 23 |  | 
|  | |
| 27 | 
             
            ENV POETRY_NO_INTERACTION=1
         | 
| 28 | 
             
            ENV POETRY_VIRTUALENVS_IN_PROJECT=true
         | 
| 29 | 
             
            ENV POETRY_VIRTUALENVS_CREATE=true
         | 
|  | |
| 30 | 
             
            ENV POETRY_REQUESTS_TIMEOUT=15
         | 
| 31 |  | 
| 32 | 
             
            # builder stage
         | 
|  | |
| 35 |  | 
| 36 | 
             
            WORKDIR /ragflow
         | 
| 37 |  | 
| 38 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_apt,target=/var/cache/apt,sharing=locked \
         | 
| 39 | 
            +
                apt update && apt install -y nodejs npm cargo && \
         | 
| 40 | 
            +
                rm -rf /var/lib/apt/lists/*
         | 
| 41 |  | 
| 42 | 
             
            COPY web web
         | 
| 43 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
         | 
| 44 | 
            +
                cd web && npm i --force && npm run build
         | 
| 45 |  | 
| 46 | 
             
            # install dependencies from poetry.lock file
         | 
| 47 | 
             
            COPY pyproject.toml poetry.toml poetry.lock ./
         | 
| 48 |  | 
| 49 | 
            +
            RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
         | 
| 50 | 
             
                if [ "$LIGHTEN" -eq 0 ]; then \
         | 
| 51 | 
             
                    poetry install --sync --no-cache --no-root --with=full; \
         | 
| 52 | 
             
                else \
         | 
|  | |
| 61 |  | 
| 62 | 
             
            # Install python packages' dependencies
         | 
| 63 | 
             
            # cv2 requires libGL.so.1
         | 
| 64 | 
            +
            RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
         | 
| 65 | 
            +
                apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
         | 
| 66 | 
            +
                rm -rf /var/lib/apt/lists/*
         | 
| 67 |  | 
| 68 | 
             
            COPY web web
         | 
| 69 | 
             
            COPY api api
         | 
|  | |
| 82 | 
             
                    /huggingface.co/InfiniFlow/deepdoc \
         | 
| 83 | 
             
                    | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
         | 
| 84 |  | 
| 85 | 
            +
            # Copy nltk data downloaded via download_deps.py
         | 
| 86 | 
            +
            COPY nltk_data /root/nltk_data
         | 
| 87 | 
            +
             | 
| 88 | 
             
            # Copy compiled web pages
         | 
| 89 | 
             
            COPY --from=builder /ragflow/web/dist /ragflow/web/dist
         | 
| 90 |  | 
| 91 | 
             
            # Copy Python environment and packages
         | 
| 92 | 
             
            ENV VIRTUAL_ENV=/ragflow/.venv
         | 
| 93 | 
             
            COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
         | 
| 94 | 
            +
            ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
         | 
|  | |
|  | |
|  | |
| 95 |  | 
| 96 | 
             
            ENV PYTHONPATH=/ragflow/
         | 
| 97 |  | 
    	
        README.md
    CHANGED
    
    | @@ -220,7 +220,7 @@ This image is approximately 1 GB in size and relies on external LLM and embeddin | |
| 220 | 
             
            ```bash
         | 
| 221 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 222 | 
             
            cd ragflow/
         | 
| 223 | 
            -
            pip3 install huggingface-hub
         | 
| 224 | 
             
            python3 download_deps.py
         | 
| 225 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 226 | 
             
            ```
         | 
| @@ -232,7 +232,7 @@ This image is approximately 9 GB in size. As it includes embedding models, it re | |
| 232 | 
             
            ```bash
         | 
| 233 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 234 | 
             
            cd ragflow/
         | 
| 235 | 
            -
            pip3 install huggingface-hub
         | 
| 236 | 
             
            python3 download_deps.py
         | 
| 237 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 238 | 
             
            ```
         | 
|  | |
| 220 | 
             
            ```bash
         | 
| 221 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 222 | 
             
            cd ragflow/
         | 
| 223 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 224 | 
             
            python3 download_deps.py
         | 
| 225 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 226 | 
             
            ```
         | 
|  | |
| 232 | 
             
            ```bash
         | 
| 233 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 234 | 
             
            cd ragflow/
         | 
| 235 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 236 | 
             
            python3 download_deps.py
         | 
| 237 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 238 | 
             
            ```
         | 
    	
        README_ja.md
    CHANGED
    
    | @@ -202,7 +202,7 @@ | |
| 202 | 
             
            ```bash
         | 
| 203 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 204 | 
             
            cd ragflow/
         | 
| 205 | 
            -
            pip3 install huggingface-hub
         | 
| 206 | 
             
            python3 download_deps.py
         | 
| 207 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 208 | 
             
            ```
         | 
| @@ -214,7 +214,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . | |
| 214 | 
             
            ```bash
         | 
| 215 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 216 | 
             
            cd ragflow/
         | 
| 217 | 
            -
            pip3 install huggingface-hub
         | 
| 218 | 
             
            python3 download_deps.py
         | 
| 219 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 220 | 
             
            ```
         | 
|  | |
| 202 | 
             
            ```bash
         | 
| 203 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 204 | 
             
            cd ragflow/
         | 
| 205 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 206 | 
             
            python3 download_deps.py
         | 
| 207 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 208 | 
             
            ```
         | 
|  | |
| 214 | 
             
            ```bash
         | 
| 215 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 216 | 
             
            cd ragflow/
         | 
| 217 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 218 | 
             
            python3 download_deps.py
         | 
| 219 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 220 | 
             
            ```
         | 
    	
        README_ko.md
    CHANGED
    
    | @@ -204,7 +204,7 @@ | |
| 204 | 
             
            ```bash
         | 
| 205 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 206 | 
             
            cd ragflow/
         | 
| 207 | 
            -
            pip3 install huggingface-hub
         | 
| 208 | 
             
            python3 download_deps.py
         | 
| 209 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 210 | 
             
            ```
         | 
| @@ -216,7 +216,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . | |
| 216 | 
             
            ```bash
         | 
| 217 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 218 | 
             
            cd ragflow/
         | 
| 219 | 
            -
            pip3 install huggingface-hub
         | 
| 220 | 
             
            python3 download_deps.py
         | 
| 221 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 222 | 
             
            ```
         | 
|  | |
| 204 | 
             
            ```bash
         | 
| 205 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 206 | 
             
            cd ragflow/
         | 
| 207 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 208 | 
             
            python3 download_deps.py
         | 
| 209 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 210 | 
             
            ```
         | 
|  | |
| 216 | 
             
            ```bash
         | 
| 217 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 218 | 
             
            cd ragflow/
         | 
| 219 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 220 | 
             
            python3 download_deps.py
         | 
| 221 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 222 | 
             
            ```
         | 
    	
        README_zh.md
    CHANGED
    
    | @@ -204,7 +204,7 @@ | |
| 204 | 
             
            ```bash
         | 
| 205 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 206 | 
             
            cd ragflow/
         | 
| 207 | 
            -
            pip3 install huggingface-hub
         | 
| 208 | 
             
            python3 download_deps.py
         | 
| 209 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 210 | 
             
            ```
         | 
| @@ -216,7 +216,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . | |
| 216 | 
             
            ```bash
         | 
| 217 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 218 | 
             
            cd ragflow/
         | 
| 219 | 
            -
            pip3 install huggingface-hub
         | 
| 220 | 
             
            python3 download_deps.py
         | 
| 221 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 222 | 
             
            ```
         | 
|  | |
| 204 | 
             
            ```bash
         | 
| 205 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 206 | 
             
            cd ragflow/
         | 
| 207 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 208 | 
             
            python3 download_deps.py
         | 
| 209 | 
             
            docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
         | 
| 210 | 
             
            ```
         | 
|  | |
| 216 | 
             
            ```bash
         | 
| 217 | 
             
            git clone https://github.com/infiniflow/ragflow.git
         | 
| 218 | 
             
            cd ragflow/
         | 
| 219 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 220 | 
             
            python3 download_deps.py
         | 
| 221 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev .
         | 
| 222 | 
             
            ```
         | 
    	
        docs/guides/develop/build_docker_image.md
    CHANGED
    
    | @@ -64,7 +64,7 @@ This image is approximately 1 GB in size and relies on external LLM services, as | |
| 64 |  | 
| 65 | 
             
            On a `linux/amd64` host:
         | 
| 66 | 
             
            ```bash
         | 
| 67 | 
            -
            pip3 install huggingface-hub
         | 
| 68 | 
             
            python3 download_deps.py
         | 
| 69 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev-amd64 .
         | 
| 70 | 
             
            docker push infiniflow/ragflow:dev-amd64
         | 
| @@ -72,7 +72,7 @@ docker push infiniflow/ragflow:dev-amd64 | |
| 72 |  | 
| 73 | 
             
            On a `linux/arm64` host:
         | 
| 74 | 
             
            ```bash
         | 
| 75 | 
            -
            pip3 install huggingface-hub
         | 
| 76 | 
             
            python3 download_deps.py
         | 
| 77 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev-arm64 .
         | 
| 78 | 
             
            docker push infiniflow/ragflow:dev-arm64
         | 
|  | |
| 64 |  | 
| 65 | 
             
            On a `linux/amd64` host:
         | 
| 66 | 
             
            ```bash
         | 
| 67 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 68 | 
             
            python3 download_deps.py
         | 
| 69 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev-amd64 .
         | 
| 70 | 
             
            docker push infiniflow/ragflow:dev-amd64
         | 
|  | |
| 72 |  | 
| 73 | 
             
            On a `linux/arm64` host:
         | 
| 74 | 
             
            ```bash
         | 
| 75 | 
            +
            pip3 install huggingface-hub nltk
         | 
| 76 | 
             
            python3 download_deps.py
         | 
| 77 | 
             
            docker build -f Dockerfile -t infiniflow/ragflow:dev-arm64 .
         | 
| 78 | 
             
            docker push infiniflow/ragflow:dev-arm64
         | 
    	
        download_deps.py
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 | 
             
            #!/usr/bin/env python3
         | 
| 2 |  | 
| 3 | 
             
            from huggingface_hub import snapshot_download
         | 
|  | |
| 4 | 
             
            import os
         | 
| 5 |  | 
| 6 | 
             
            repos = [
         | 
| @@ -12,13 +13,17 @@ repos = [ | |
| 12 | 
             
                "maidalun1020/bce-reranker-base_v1",
         | 
| 13 | 
             
            ]
         | 
| 14 |  | 
| 15 | 
            -
             | 
| 16 | 
             
            def download_model(repo_id):
         | 
| 17 | 
            -
                local_dir = os.path.join("huggingface.co", repo_id)
         | 
| 18 | 
             
                os.makedirs(local_dir, exist_ok=True)
         | 
| 19 | 
             
                snapshot_download(repo_id=repo_id, local_dir=local_dir)
         | 
| 20 |  | 
| 21 |  | 
| 22 | 
             
            if __name__ == "__main__":
         | 
|  | |
|  | |
|  | |
|  | |
| 23 | 
             
                for repo_id in repos:
         | 
|  | |
| 24 | 
             
                    download_model(repo_id)
         | 
|  | |
| 1 | 
             
            #!/usr/bin/env python3
         | 
| 2 |  | 
| 3 | 
             
            from huggingface_hub import snapshot_download
         | 
| 4 | 
            +
            import nltk
         | 
| 5 | 
             
            import os
         | 
| 6 |  | 
| 7 | 
             
            repos = [
         | 
|  | |
| 13 | 
             
                "maidalun1020/bce-reranker-base_v1",
         | 
| 14 | 
             
            ]
         | 
| 15 |  | 
|  | |
| 16 | 
             
            def download_model(repo_id):
         | 
| 17 | 
            +
                local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
         | 
| 18 | 
             
                os.makedirs(local_dir, exist_ok=True)
         | 
| 19 | 
             
                snapshot_download(repo_id=repo_id, local_dir=local_dir)
         | 
| 20 |  | 
| 21 |  | 
| 22 | 
             
            if __name__ == "__main__":
         | 
| 23 | 
            +
                local_dir = os.path.abspath('nltk_data')
         | 
| 24 | 
            +
                for data in ['wordnet', 'punkt', 'wordnet']:
         | 
| 25 | 
            +
                    print(f"Downloading nltk {data}...")
         | 
| 26 | 
            +
                    nltk.download(data, download_dir=local_dir)
         | 
| 27 | 
             
                for repo_id in repos:
         | 
| 28 | 
            +
                    print(f"Downloading huggingface repo {repo_id}...")
         | 
| 29 | 
             
                    download_model(repo_id)
         |