Spaces:
Sleeping
Sleeping
Commit
·
f11b1de
1
Parent(s):
38cf18d
Create Dockerfile
Browse files- Dockerfile +51 -0
Dockerfile
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# syntax=docker/dockerfile:experimental
|
2 |
+
FROM quay.io/unstructured-io/base-images:rocky9.2-5@sha256:1721c3b0711e4e90587e3b4917f1b616e4603ddf5b4986bfaa68d02d82a13aba as base
|
3 |
+
|
4 |
+
# NOTE(crag): NB_USER ARG for mybinder.org compat:
|
5 |
+
# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
|
6 |
+
ARG NB_USER=notebook-user
|
7 |
+
ARG NB_UID=1000
|
8 |
+
ARG PIP_VERSION
|
9 |
+
ARG PIPELINE_PACKAGE
|
10 |
+
|
11 |
+
# Set up environment
|
12 |
+
ENV USER ${NB_USER}
|
13 |
+
ENV HOME /home/${NB_USER}
|
14 |
+
|
15 |
+
RUN groupadd --gid ${NB_UID} ${NB_USER}
|
16 |
+
RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
|
17 |
+
WORKDIR ${HOME}
|
18 |
+
RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \
|
19 |
+
&& ssh-keyscan -t rsa github.com >> /home/${NB_USER}/.ssh/known_hosts
|
20 |
+
|
21 |
+
ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
|
22 |
+
ENV PATH="/home/${NB_USER}/.local/bin:${PATH}"
|
23 |
+
|
24 |
+
FROM base as python-deps
|
25 |
+
# COPY requirements/dev.txt requirements-dev.txt
|
26 |
+
COPY requirements/base.txt requirements-base.txt
|
27 |
+
RUN python3.10 -m pip install pip==${PIP_VERSION} \
|
28 |
+
&& dnf -y groupinstall "Development Tools" \
|
29 |
+
&& su -l ${NB_USER} -c 'pip3.10 install --no-cache -r requirements-base.txt' \
|
30 |
+
&& dnf -y groupremove "Development Tools" \
|
31 |
+
&& dnf clean all \
|
32 |
+
&& ln -s /home/notebook-user/.local/bin/pip3.10 /usr/local/bin/pip3.10 || true
|
33 |
+
|
34 |
+
USER ${NB_USER}
|
35 |
+
|
36 |
+
FROM python-deps as model-deps
|
37 |
+
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
|
38 |
+
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
|
39 |
+
python3.10 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
|
40 |
+
|
41 |
+
FROM model-deps as code
|
42 |
+
COPY --chown=${NB_USER}:${NB_USER} CHANGELOG.md CHANGELOG.md
|
43 |
+
COPY --chown=${NB_USER}:${NB_USER} logger_config.yaml logger_config.yaml
|
44 |
+
COPY --chown=${NB_USER}:${NB_USER} prepline_${PIPELINE_PACKAGE}/ prepline_${PIPELINE_PACKAGE}/
|
45 |
+
COPY --chown=${NB_USER}:${NB_USER} exploration-notebooks exploration-notebooks
|
46 |
+
COPY --chown=${NB_USER}:${NB_USER} scripts/app-start.sh scripts/app-start.sh
|
47 |
+
|
48 |
+
ENTRYPOINT ["scripts/app-start.sh"]
|
49 |
+
# Expose a default port of 8000. Note: The EXPOSE instruction does not actually publish the port,
|
50 |
+
# but some tooling will inspect containers and perform work contingent on networking support declared.
|
51 |
+
EXPOSE 8000
|