version: '3.8' services: gema-model-api: build: . ports: - "8000:8000" environment: - PYTHONUNBUFFERED=1 volumes: - ./cache:/root/.cache/huggingface # Cache model downloads restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 deploy: resources: limits: memory: 8G # Adjust based on your system reservations: memory: 4G