# 1. Base Image FROM python:3.10-slim # 2. Install system dependencies REQUIRED FOR COMPILING llama-cpp-python RUN apt-get update && apt-get install -y \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* # 3. Set up a non-root user RUN useradd -m -u 1000 user USER user # 4. Set Environment Variables & Working Directory ENV HOME=/home/user ENV PATH=$HOME/.local/bin:$PATH WORKDIR $HOME/app # 5. Copy requirements first for better Docker layer caching COPY --chown=user requirements.txt . # 6. Set build arguments to speed up llama-cpp-python installation # This is the key fix for the timeout error. It disables GPU support checks. ENV CMAKE_ARGS="-DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF" # 7. Install Python dependencies for the non-root user RUN pip install --no-cache-dir --user -r requirements.txt # 8. Download the model during the build process RUN huggingface-cli download Dnfs/gema-4b-indra10k-model1-Q4_K_M-GGUF \ --local-dir ./model \ --local-dir-use-symlinks False # 9. Copy the rest of the application code COPY --chown=user app.py . # 10. Expose the port the app runs on EXPOSE 8000 # 11. Health check to ensure the app is running HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:8000/health || exit 1 # 12. Command to run the application CMD ["python", "app.py"]