davanstrien HF Staff commited on
Commit
f11b1de
·
1 Parent(s): 38cf18d

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +51 -0
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:experimental
2
+ FROM quay.io/unstructured-io/base-images:rocky9.2-5@sha256:1721c3b0711e4e90587e3b4917f1b616e4603ddf5b4986bfaa68d02d82a13aba as base
3
+
4
+ # NOTE(crag): NB_USER ARG for mybinder.org compat:
5
+ # https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
6
+ ARG NB_USER=notebook-user
7
+ ARG NB_UID=1000
8
+ ARG PIP_VERSION
9
+ ARG PIPELINE_PACKAGE
10
+
11
+ # Set up environment
12
+ ENV USER ${NB_USER}
13
+ ENV HOME /home/${NB_USER}
14
+
15
+ RUN groupadd --gid ${NB_UID} ${NB_USER}
16
+ RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
17
+ WORKDIR ${HOME}
18
+ RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \
19
+ && ssh-keyscan -t rsa github.com >> /home/${NB_USER}/.ssh/known_hosts
20
+
21
+ ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
22
+ ENV PATH="/home/${NB_USER}/.local/bin:${PATH}"
23
+
24
+ FROM base as python-deps
25
+ # COPY requirements/dev.txt requirements-dev.txt
26
+ COPY requirements/base.txt requirements-base.txt
27
+ RUN python3.10 -m pip install pip==${PIP_VERSION} \
28
+ && dnf -y groupinstall "Development Tools" \
29
+ && su -l ${NB_USER} -c 'pip3.10 install --no-cache -r requirements-base.txt' \
30
+ && dnf -y groupremove "Development Tools" \
31
+ && dnf clean all \
32
+ && ln -s /home/notebook-user/.local/bin/pip3.10 /usr/local/bin/pip3.10 || true
33
+
34
+ USER ${NB_USER}
35
+
36
+ FROM python-deps as model-deps
37
+ RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
38
+ python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
39
+ python3.10 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
40
+
41
+ FROM model-deps as code
42
+ COPY --chown=${NB_USER}:${NB_USER} CHANGELOG.md CHANGELOG.md
43
+ COPY --chown=${NB_USER}:${NB_USER} logger_config.yaml logger_config.yaml
44
+ COPY --chown=${NB_USER}:${NB_USER} prepline_${PIPELINE_PACKAGE}/ prepline_${PIPELINE_PACKAGE}/
45
+ COPY --chown=${NB_USER}:${NB_USER} exploration-notebooks exploration-notebooks
46
+ COPY --chown=${NB_USER}:${NB_USER} scripts/app-start.sh scripts/app-start.sh
47
+
48
+ ENTRYPOINT ["scripts/app-start.sh"]
49
+ # Expose a default port of 8000. Note: The EXPOSE instruction does not actually publish the port,
50
+ # but some tooling will inspect containers and perform work contingent on networking support declared.
51
+ EXPOSE 8000