cyber_llm / src /deployment /k8s /autoscaling.yaml
unit731's picture
Upload core Cyber-LLM platform components
23804b3 verified
# Horizontal Pod Autoscaler for Cyber-LLM API
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: cyber-llm-api-hpa
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: autoscaling
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: cyber-llm-api
minReplicas: 2
maxReplicas: 10
metrics:
# CPU utilization
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory utilization
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# GPU utilization (if GPU metrics are available)
- type: Pods
pods:
metric:
name: gpu_utilization
target:
type: AverageValue
averageValue: "0.8"
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
---
# Vertical Pod Autoscaler (if VPA is installed)
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: cyber-llm-api-vpa
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: autoscaling
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: cyber-llm-api
updatePolicy:
updateMode: "Auto" # or "Off" for recommendation only
resourcePolicy:
containerPolicies:
- containerName: cyber-llm-api
maxAllowed:
memory: 16Gi
cpu: 8
minAllowed:
memory: 2Gi
cpu: 1
controlledResources: ["cpu", "memory"]