FROM python:3.10-slim # Install essential packages RUN apt-get update && apt-get install -y \ build-essential \ curl \ software-properties-common \ git \ tesseract-ocr \ tesseract-ocr-ind \ libgl1-mesa-glx \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # Install nodejs (required for Playwright) RUN curl -sL https://deb.nodesource.com/setup_18.x | bash - && \ apt-get install -y nodejs # Set up working directory WORKDIR /app # Create non-root user for better security and permissions RUN useradd -m -u 1000 user RUN chown -R user:user /app # Create cache directories with proper permissions RUN mkdir -p /.cache && chown -R user:user /.cache && chmod -R 777 /.cache RUN mkdir -p /home/user/.cache && chown -R user:user /home/user/.cache # Install Python dependencies COPY requirements.txt /app/ RUN pip install --no-cache-dir -r requirements.txt # Install Playwright RUN pip install playwright && \ playwright install chromium && \ playwright install-deps chromium # Download and cache the tokenizer RUN mkdir -p /app/tokenizers/indobert-base-p1 RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1', cache_dir='/app/tokenizers')" RUN chown -R user:user /app/tokenizers # Copy application code COPY . /app/ RUN chown -R user:user /app # Create directory for screenshots and models RUN mkdir -p screenshots models RUN chown -R user:user screenshots models # Make sure the app runs at port 7860 (Gradio default) EXPOSE 7860 # Switch to non-root user USER user # Start the application CMD ["python", "app.py"]