Spaces:
Sleeping
Sleeping
File size: 7,461 Bytes
73d57ae 98aae70 73d57ae 479ced5 98aae70 479ced5 cbcf1e4 1dc37e8 ca54b04 5a08ed8 98aae70 479ced5 73d57ae 04fd216 73d57ae 442515d 04fd216 9626485 73d57ae 98aae70 442515d 98aae70 442515d 98aae70 442515d 98aae70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
FROM python:3.9-slim
WORKDIR /app
# Set environment variables to prevent root filesystem access
ENV TEMP_DIR=/tmp/docling_temp
ENV HOME=/tmp/docling_temp
ENV USERPROFILE=/tmp/docling_temp
ENV TMPDIR=/tmp/docling_temp
ENV TEMP=/tmp/docling_temp
ENV TMP=/tmp/docling_temp
# Hugging Face Hub configuration - CRITICAL for preventing /.cache access
ENV HF_HOME=/tmp/docling_temp/huggingface
ENV HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache
ENV HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache
ENV TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache
ENV HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache
ENV DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache
ENV ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache
# Additional Hugging Face specific variables
ENV HF_HUB_DISABLE_TELEMETRY=1
ENV HF_HUB_DISABLE_IMPLICIT_TOKEN=1
ENV HF_HUB_OFFLINE=0
# Other ML libraries
ENV TORCH_HOME=/tmp/docling_temp/torch
ENV TENSORFLOW_HOME=/tmp/docling_temp/tensorflow
ENV KERAS_HOME=/tmp/docling_temp/keras
# XDG directories
ENV XDG_CACHE_HOME=/tmp/docling_temp/cache
ENV XDG_CONFIG_HOME=/tmp/docling_temp/config
ENV XDG_DATA_HOME=/tmp/docling_temp/data
# EasyOCR configuration
ENV EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models
# Additional cache directories
ENV CACHE_DIR=/tmp/docling_temp/cache
ENV MODEL_CACHE_DIR=/tmp/docling_temp/models
ENV CACHE=/tmp/docling_temp/cache
ENV MODELS=/tmp/docling_temp/models
ENV DATA=/tmp/docling_temp/data
ENV CONFIG=/tmp/docling_temp/config
# Python path
ENV PYTHONPATH=/tmp/docling_temp
RUN apt-get update && apt-get install -y \
build-essential \
curl \
software-properties-common \
git \
&& rm -rf /var/lib/apt/lists/*
# Create necessary directories with proper permissions
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache /tmp/torch /tmp/tensorflow /tmp/keras /tmp/accelerate_cache /tmp/diffusers_cache /tmp/models && \
chmod 755 /app/.streamlit && \
chmod 777 /tmp/docling_temp && \
chmod 777 /tmp/easyocr_models && \
chmod 777 /tmp/cache && \
chmod 777 /tmp/config && \
chmod 777 /tmp/data && \
chmod 777 /tmp/huggingface && \
chmod 777 /tmp/huggingface_cache && \
chmod 777 /tmp/transformers_cache && \
chmod 777 /tmp/datasets_cache && \
chmod 777 /tmp/torch && \
chmod 777 /tmp/tensorflow && \
chmod 777 /tmp/keras && \
chmod 777 /tmp/accelerate_cache && \
chmod 777 /tmp/diffusers_cache && \
chmod 777 /tmp/models
COPY requirements.txt ./
COPY pyproject.toml ./
COPY src/ ./src/
COPY test_permissions.py ./
COPY README.md ./
# Create Streamlit config directly in Dockerfile to avoid copy issues
RUN echo '[global]' > /app/.streamlit/config.toml && \
echo 'developmentMode = false' >> /app/.streamlit/config.toml && \
echo '' >> /app/.streamlit/config.toml && \
echo '[server]' >> /app/.streamlit/config.toml && \
echo 'fileWatcherType = "none"' >> /app/.streamlit/config.toml && \
echo 'headless = true' >> /app/.streamlit/config.toml && \
echo 'enableCORS = false' >> /app/.streamlit/config.toml && \
echo 'enableXsrfProtection = false' >> /app/.streamlit/config.toml && \
echo '' >> /app/.streamlit/config.toml && \
echo '[browser]' >> /app/.streamlit/config.toml && \
echo 'gatherUsageStats = false' >> /app/.streamlit/config.toml && \
echo 'serverAddress = "0.0.0.0"' >> /app/.streamlit/config.toml && \
echo 'serverPort = 8501' >> /app/.streamlit/config.toml && \
echo '' >> /app/.streamlit/config.toml && \
echo '[theme]' >> /app/.streamlit/config.toml && \
echo 'primaryColor = "#1f77b4"' >> /app/.streamlit/config.toml && \
echo 'backgroundColor = "#ffffff"' >> /app/.streamlit/config.toml && \
echo 'secondaryBackgroundColor = "#f0f2f6"' >> /app/.streamlit/config.toml && \
echo 'textColor = "#262730"' >> /app/.streamlit/config.toml
RUN pip3 install -r requirements.txt
EXPOSE 8501
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
# Create a startup script to ensure environment variables are set
RUN echo '#!/bin/bash' > /app/start.sh && \
echo 'export TEMP_DIR=/tmp/docling_temp' >> /app/start.sh && \
echo 'export HOME=/tmp/docling_temp' >> /app/start.sh && \
echo 'export USERPROFILE=/tmp/docling_temp' >> /app/start.sh && \
echo 'export TMPDIR=/tmp/docling_temp' >> /app/start.sh && \
echo 'export TEMP=/tmp/docling_temp' >> /app/start.sh && \
echo 'export TMP=/tmp/docling_temp' >> /app/start.sh && \
echo 'export HF_HOME=/tmp/docling_temp/huggingface' >> /app/start.sh && \
echo 'export HF_CACHE_HOME=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \
echo 'export HF_HUB_CACHE=/tmp/docling_temp/huggingface_cache' >> /app/start.sh && \
echo 'export TRANSFORMERS_CACHE=/tmp/docling_temp/transformers_cache' >> /app/start.sh && \
echo 'export HF_DATASETS_CACHE=/tmp/docling_temp/datasets_cache' >> /app/start.sh && \
echo 'export DIFFUSERS_CACHE=/tmp/docling_temp/diffusers_cache' >> /app/start.sh && \
echo 'export ACCELERATE_CACHE=/tmp/docling_temp/accelerate_cache' >> /app/start.sh && \
echo 'export HF_HUB_DISABLE_TELEMETRY=1' >> /app/start.sh && \
echo 'export HF_HUB_DISABLE_IMPLICIT_TOKEN=1' >> /app/start.sh && \
echo 'export HF_HUB_OFFLINE=0' >> /app/start.sh && \
echo 'export TORCH_HOME=/tmp/docling_temp/torch' >> /app/start.sh && \
echo 'export TENSORFLOW_HOME=/tmp/docling_temp/tensorflow' >> /app/start.sh && \
echo 'export KERAS_HOME=/tmp/docling_temp/keras' >> /app/start.sh && \
echo 'export XDG_CACHE_HOME=/tmp/docling_temp/cache' >> /app/start.sh && \
echo 'export XDG_CONFIG_HOME=/tmp/docling_temp/config' >> /app/start.sh && \
echo 'export XDG_DATA_HOME=/tmp/docling_temp/data' >> /app/start.sh && \
echo 'export EASYOCR_MODULE_PATH=/tmp/docling_temp/easyocr_models' >> /app/start.sh && \
echo 'export CACHE_DIR=/tmp/docling_temp/cache' >> /app/start.sh && \
echo 'export MODEL_CACHE_DIR=/tmp/docling_temp/models' >> /app/start.sh && \
echo 'export CACHE=/tmp/docling_temp/cache' >> /app/start.sh && \
echo 'export MODELS=/tmp/docling_temp/models' >> /app/start.sh && \
echo 'export DATA=/tmp/docling_temp/data' >> /app/start.sh && \
echo 'export CONFIG=/tmp/docling_temp/config' >> /app/start.sh && \
echo 'export PYTHONPATH=/tmp/docling_temp' >> /app/start.sh && \
echo 'echo "Environment variables set for Hugging Face Hub cache directories"' >> /app/start.sh && \
echo 'echo "HF_HUB_CACHE: $HF_HUB_CACHE"' >> /app/start.sh && \
echo 'echo "HF_CACHE_HOME: $HF_CACHE_HOME"' >> /app/start.sh && \
echo 'echo "TEMP_DIR: $TEMP_DIR"' >> /app/start.sh && \
echo 'echo "Running environment test..."' >> /app/start.sh && \
echo 'python test_permissions.py' >> /app/start.sh && \
echo 'if [ $? -eq 0 ]; then' >> /app/start.sh && \
echo ' echo "Environment test passed, starting Streamlit app..."' >> /app/start.sh && \
echo ' exec streamlit run src/streamlit_app.py --server.port=8501 --server.address=0.0.0.0' >> /app/start.sh && \
echo 'else' >> /app/start.sh && \
echo ' echo "Environment test failed, exiting..."' >> /app/start.sh && \
echo ' exit 1' >> /app/start.sh && \
echo 'fi' >> /app/start.sh && \
chmod +x /app/start.sh
ENTRYPOINT ["/app/start.sh"] |