cyber_llm / src /deployment /docker /docker-compose.yml
unit731's picture
Upload core Cyber-LLM platform components
23804b3 verified
raw
history blame
3.91 kB
version: '3.8'
services:
# Main Cyber-LLM API service
cyber-llm-api:
build:
context: ../../../
dockerfile: src/deployment/docker/Dockerfile
target: production
container_name: cyber-llm-api
ports:
- "8000:8000"
environment:
- PYTHONPATH=/home/cyberllm
- CUDA_VISIBLE_DEVICES=0
- TRANSFORMERS_CACHE=/home/cyberllm/models/cache
volumes:
- ./data:/home/cyberllm/data
- ./models:/home/cyberllm/models
- ./logs:/home/cyberllm/logs
- ./configs:/home/cyberllm/configs
networks:
- cyber-llm-network
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# Training service (optional)
cyber-llm-training:
build:
context: ../../../
dockerfile: src/deployment/docker/Dockerfile
target: training
container_name: cyber-llm-training
environment:
- PYTHONPATH=/home/cyberllm
- CUDA_VISIBLE_DEVICES=0
- WANDB_API_KEY=${WANDB_API_KEY}
- MLFLOW_TRACKING_URI=http://mlflow:5000
volumes:
- ./data:/home/cyberllm/data
- ./models:/home/cyberllm/models
- ./logs:/home/cyberllm/logs
- ./configs:/home/cyberllm/configs
networks:
- cyber-llm-network
profiles:
- training
depends_on:
- mlflow
# MLflow tracking server
mlflow:
image: python:3.10-slim
container_name: cyber-llm-mlflow
ports:
- "5000:5000"
environment:
- MLFLOW_BACKEND_STORE_URI=sqlite:///mlflow/mlflow.db
- MLFLOW_DEFAULT_ARTIFACT_ROOT=/mlflow/artifacts
volumes:
- ./mlflow:/mlflow
networks:
- cyber-llm-network
command: >
bash -c "
pip install mlflow &&
mlflow server
--backend-store-uri sqlite:///mlflow/mlflow.db
--default-artifact-root /mlflow/artifacts
--host 0.0.0.0
--port 5000
"
profiles:
- training
- monitoring
# Prometheus monitoring
prometheus:
image: prom/prometheus:latest
container_name: cyber-llm-prometheus
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
- ./monitoring/prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
networks:
- cyber-llm-network
profiles:
- monitoring
# Grafana dashboard
grafana:
image: grafana/grafana:latest
container_name: cyber-llm-grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- ./monitoring/grafana_data:/var/lib/grafana
- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
networks:
- cyber-llm-network
profiles:
- monitoring
# Redis for caching (optional)
redis:
image: redis:7-alpine
container_name: cyber-llm-redis
ports:
- "6379:6379"
volumes:
- ./redis_data:/data
networks:
- cyber-llm-network
profiles:
- cache
# Nginx reverse proxy
nginx:
image: nginx:alpine
container_name: cyber-llm-nginx
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
- ./nginx/ssl:/etc/nginx/ssl
networks:
- cyber-llm-network
depends_on:
- cyber-llm-api
profiles:
- production
networks:
cyber-llm-network:
driver: bridge
volumes:
data:
models:
logs:
mlflow:
prometheus_data:
grafana_data:
redis_data: