# Use PyTorch with CUDA 12.1 for L4 GPU compatibility FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime # Install system dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ git \ git-lfs \ build-essential \ ffmpeg \ curl \ wget && \ rm -rf /var/lib/apt/lists/* # Create non-root user for Hugging Face Spaces RUN useradd -m -u 1000 user USER user WORKDIR /app # Set environment variables ENV PATH="/home/user/.local/bin:$PATH" ENV HF_HOME=/app/.cache ENV PYTHONPATH="/app:$PYTHONPATH" # Copy application files COPY --chown=user . /app # Upgrade pip and install Python dependencies RUN pip install --upgrade pip # Install requirements COPY --chown=user requirements.txt . RUN pip install -r requirements.txt # --no-cache-dir # Install additional optimizations for L4 GPU RUN pip install --no-cache-dir flash-attn --no-build-isolation || echo "Flash attention not available, continuing..." # Expose port for Hugging Face Spaces EXPOSE 7860 # Environment variables for the application ENV PYTHONUNBUFFERED=1 ENV UVICORN_HOST=0.0.0.0 ENV UVICORN_PORT=7860 # Health check using FastAPI health endpoint HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Start the FastAPI application with uvicorn CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]