File size: 7,461 Bytes
73d57ae
 
 
 
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73d57ae
 
 
 
 
 
 
479ced5
98aae70
479ced5
cbcf1e4
1dc37e8
 
 
ca54b04
 
 
 
5a08ed8
 
 
98aae70
 
 
 
479ced5
73d57ae
04fd216
73d57ae
442515d
04fd216
9626485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73d57ae
 
 
 
 
 
 
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442515d
 
98aae70
442515d
98aae70
 
442515d
98aae70
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
FROM python:3.9-slim

WORKDIR /app

# Set environment variables to prevent root filesystem access
ENV TEMP_DIR=/tmp/docling_temp
ENV HOME=/tmp/docling_temp
ENV USERPROFILE=/tmp/docling_temp
ENV TMPDIR=/tmp/docling_temp
ENV TEMP=/tmp/docling_temp
ENV TMP=/tmp/docling_temp

# Hugging Face Hub configuration - CRITICAL for preventing /.cache access
ENV HF_HOME=/tmp/docling_temp/huggingface
ENV HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache
ENV HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache
ENV TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache
ENV HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache
ENV DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache
ENV ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache

# Additional Hugging Face specific variables
ENV HF_HUB_DISABLE_TELEMETRY=1
ENV HF_HUB_DISABLE_IMPLICIT_TOKEN=1
ENV HF_HUB_OFFLINE=0

# Other ML libraries
ENV TORCH_HOME=/tmp/docling_temp/torch
ENV TENSORFLOW_HOME=/tmp/docling_temp/tensorflow
ENV KERAS_HOME=/tmp/docling_temp/keras

# XDG directories
ENV XDG_CACHE_HOME=/tmp/docling_temp/cache
ENV XDG_CONFIG_HOME=/tmp/docling_temp/config
ENV XDG_DATA_HOME=/tmp/docling_temp/data

# EasyOCR configuration
ENV EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models

# Additional cache directories
ENV CACHE_DIR=/tmp/docling_temp/cache
ENV MODEL_CACHE_DIR=/tmp/docling_temp/models
ENV CACHE=/tmp/docling_temp/cache
ENV MODELS=/tmp/docling_temp/models
ENV DATA=/tmp/docling_temp/data
ENV CONFIG=/tmp/docling_temp/config

# Python path
ENV PYTHONPATH=/tmp/docling_temp

RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    software-properties-common \
    git \
    && rm -rf /var/lib/apt/lists/*

# Create necessary directories with proper permissions
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache /tmp/torch /tmp/tensorflow /tmp/keras /tmp/accelerate_cache /tmp/diffusers_cache /tmp/models && \
    chmod 755 /app/.streamlit && \
    chmod 777 /tmp/docling_temp && \
    chmod 777 /tmp/easyocr_models && \
    chmod 777 /tmp/cache && \
    chmod 777 /tmp/config && \
    chmod 777 /tmp/data && \
    chmod 777 /tmp/huggingface && \
    chmod 777 /tmp/huggingface_cache && \
    chmod 777 /tmp/transformers_cache && \
    chmod 777 /tmp/datasets_cache && \
    chmod 777 /tmp/torch && \
    chmod 777 /tmp/tensorflow && \
    chmod 777 /tmp/keras && \
    chmod 777 /tmp/accelerate_cache && \
    chmod 777 /tmp/diffusers_cache && \
    chmod 777 /tmp/models

COPY requirements.txt ./
COPY pyproject.toml ./
COPY src/ ./src/
COPY test_permissions.py ./
COPY README.md ./

# Create Streamlit config directly in Dockerfile to avoid copy issues
RUN echo '[global]' > /app/.streamlit/config.toml && \
    echo 'developmentMode = false' >> /app/.streamlit/config.toml && \
    echo '' >> /app/.streamlit/config.toml && \
    echo '[server]' >> /app/.streamlit/config.toml && \
    echo 'fileWatcherType = "none"' >> /app/.streamlit/config.toml && \
    echo 'headless = true' >> /app/.streamlit/config.toml && \
    echo 'enableCORS = false' >> /app/.streamlit/config.toml && \
    echo 'enableXsrfProtection = false' >> /app/.streamlit/config.toml && \
    echo '' >> /app/.streamlit/config.toml && \
    echo '[browser]' >> /app/.streamlit/config.toml && \
    echo 'gatherUsageStats = false' >> /app/.streamlit/config.toml && \
    echo 'serverAddress = "0.0.0.0"' >> /app/.streamlit/config.toml && \
    echo 'serverPort = 8501' >> /app/.streamlit/config.toml && \
    echo '' >> /app/.streamlit/config.toml && \
    echo '[theme]' >> /app/.streamlit/config.toml && \
    echo 'primaryColor = "#1f77b4"' >> /app/.streamlit/config.toml && \
    echo 'backgroundColor = "#ffffff"' >> /app/.streamlit/config.toml && \
    echo 'secondaryBackgroundColor = "#f0f2f6"' >> /app/.streamlit/config.toml && \
    echo 'textColor = "#262730"' >> /app/.streamlit/config.toml

RUN pip3 install -r requirements.txt

EXPOSE 8501

HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health

# Create a startup script to ensure environment variables are set
RUN echo '#!/bin/bash' > /app/start.sh && \
    echo 'export TEMP_DIR=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export HOME=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export USERPROFILE=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export TMPDIR=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export TEMP=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export TMP=/tmp/docling_temp' >> /app/start.sh && \
    echo 'export HF_HOME=/tmp/docling_temp/huggingface' >> /app/start.sh && \
    echo 'export HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \
    echo 'export HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \
    echo 'export TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache' >> /app/start.sh && \
    echo 'export HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache' >> /app/start.sh && \
    echo 'export DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache' >> /app/start.sh && \
    echo 'export ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache' >> /app/start.sh && \
    echo 'export HF_HUB_DISABLE_TELEMETRY=1' >> /app/start.sh && \
    echo 'export HF_HUB_DISABLE_IMPLICIT_TOKEN=1' >> /app/start.sh && \
    echo 'export HF_HUB_OFFLINE=0' >> /app/start.sh && \
    echo 'export TORCH_HOME=/tmp/docling_temp/torch' >> /app/start.sh && \
    echo 'export TENSORFLOW_HOME=/tmp/docling_temp/tensorflow' >> /app/start.sh && \
    echo 'export KERAS_HOME=/tmp/docling_temp/keras' >> /app/start.sh && \
    echo 'export XDG_CACHE_HOME=/tmp/docling_temp/cache' >> /app/start.sh && \
    echo 'export XDG_CONFIG_HOME=/tmp/docling_temp/config' >> /app/start.sh && \
    echo 'export XDG_DATA_HOME=/tmp/docling_temp/data' >> /app/start.sh && \
    echo 'export EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models' >> /app/start.sh && \
    echo 'export CACHE_DIR=/tmp/docling_temp/cache' >> /app/start.sh && \
    echo 'export MODEL_CACHE_DIR=/tmp/docling_temp/models' >> /app/start.sh && \
    echo 'export CACHE=/tmp/docling_temp/cache' >> /app/start.sh && \
    echo 'export MODELS=/tmp/docling_temp/models' >> /app/start.sh && \
    echo 'export DATA=/tmp/docling_temp/data' >> /app/start.sh && \
    echo 'export CONFIG=/tmp/docling_temp/config' >> /app/start.sh && \
    echo 'export PYTHONPATH=/tmp/docling_temp' >> /app/start.sh && \
    echo 'echo "Environment variables set for Hugging Face Hub cache directories"' >> /app/start.sh && \
    echo 'echo "HF_HUB_CACHE: $HF_HUB_CACHE"' >> /app/start.sh && \
    echo 'echo "HF_CACHE_HOME: $HF_CACHE_HOME"' >> /app/start.sh && \
    echo 'echo "TEMP_DIR: $TEMP_DIR"' >> /app/start.sh && \
    echo 'echo "Running environment test..."' >> /app/start.sh && \
    echo 'python test_permissions.py' >> /app/start.sh && \
    echo 'if [ $? -eq 0 ]; then' >> /app/start.sh && \
    echo '    echo "Environment test passed, starting Streamlit app..."' >> /app/start.sh && \
    echo '    exec streamlit run src/streamlit_app.py --server.port=8501 --server.address=0.0.0.0' >> /app/start.sh && \
    echo 'else' >> /app/start.sh && \
    echo '    echo "Environment test failed, exiting..."' >> /app/start.sh && \
    echo '    exit 1' >> /app/start.sh && \
    echo 'fi' >> /app/start.sh && \
    chmod +x /app/start.sh

ENTRYPOINT ["/app/start.sh"]