RamMAC / docker-compose.pc1-gpu.yml
Aaryan17's picture
feat: upload full MAC source (mac/, frontend/, alembic/, tests/)
9c0b225 verified
# ═══════════════════════════════════════════════════════════
# PC 1 β€” GPU Inference Node (vLLM only)
# ═══════════════════════════════════════════════════════════
# Run on the PC with the GPU. Exposes vLLM on port 8001.
# PC 2 connects to this PC's IP:8001 for AI inference.
#
# Usage:
# docker compose -f docker-compose.pc1-gpu.yml up -d
# ═══════════════════════════════════════════════════════════
services:
vllm-speed:
image: vllm/vllm-openai:latest
container_name: mac-vllm-speed
ports:
- "8001:8001"
environment:
- HF_HOME=/root/.cache/huggingface
volumes:
- hf-cache:/root/.cache/huggingface
command: >
--model Qwen/Qwen2.5-7B-Instruct-AWQ
--port 8001
--gpu-memory-utilization 0.90
--max-model-len 8192
--trust-remote-code
--enforce-eager
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
volumes:
hf-cache: