File size: 3,911 Bytes
23804b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
version: '3.8'
services:
# Main Cyber-LLM API service
cyber-llm-api:
build:
context: ../../../
dockerfile: src/deployment/docker/Dockerfile
target: production
container_name: cyber-llm-api
ports:
- "8000:8000"
environment:
- PYTHONPATH=/home/cyberllm
- CUDA_VISIBLE_DEVICES=0
- TRANSFORMERS_CACHE=/home/cyberllm/models/cache
volumes:
- ./data:/home/cyberllm/data
- ./models:/home/cyberllm/models
- ./logs:/home/cyberllm/logs
- ./configs:/home/cyberllm/configs
networks:
- cyber-llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# Training service (optional)
cyber-llm-training:
build:
context: ../../../
dockerfile: src/deployment/docker/Dockerfile
target: training
container_name: cyber-llm-training
environment:
- PYTHONPATH=/home/cyberllm
- CUDA_VISIBLE_DEVICES=0
- WANDB_API_KEY=${WANDB_API_KEY}
- MLFLOW_TRACKING_URI=http://mlflow:5000
volumes:
- ./data:/home/cyberllm/data
- ./models:/home/cyberllm/models
- ./logs:/home/cyberllm/logs
- ./configs:/home/cyberllm/configs
networks:
- cyber-llm-network
profiles:
- training
depends_on:
- mlflow
# MLflow tracking server
mlflow:
image: python:3.10-slim
container_name: cyber-llm-mlflow
ports:
- "5000:5000"
environment:
- MLFLOW_BACKEND_STORE_URI=sqlite:///mlflow/mlflow.db
- MLFLOW_DEFAULT_ARTIFACT_ROOT=/mlflow/artifacts
volumes:
- ./mlflow:/mlflow
networks:
- cyber-llm-network
command: >
bash -c "
pip install mlflow &&
mlflow server
--backend-store-uri sqlite:///mlflow/mlflow.db
--default-artifact-root /mlflow/artifacts
--host 0.0.0.0
--port 5000
"
profiles:
- training
- monitoring
# Prometheus monitoring
prometheus:
image: prom/prometheus:latest
container_name: cyber-llm-prometheus
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
- ./monitoring/prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
networks:
- cyber-llm-network
profiles:
- monitoring
# Grafana dashboard
grafana:
image: grafana/grafana:latest
container_name: cyber-llm-grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- ./monitoring/grafana_data:/var/lib/grafana
- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
networks:
- cyber-llm-network
profiles:
- monitoring
# Redis for caching (optional)
redis:
image: redis:7-alpine
container_name: cyber-llm-redis
ports:
- "6379:6379"
volumes:
- ./redis_data:/data
networks:
- cyber-llm-network
profiles:
- cache
# Nginx reverse proxy
nginx:
image: nginx:alpine
container_name: cyber-llm-nginx
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
- ./nginx/ssl:/etc/nginx/ssl
networks:
- cyber-llm-network
depends_on:
- cyber-llm-api
profiles:
- production
networks:
cyber-llm-network:
driver: bridge
volumes:
data:
models:
logs:
mlflow:
prometheus_data:
grafana_data:
redis_data:
|