# Horizontal Pod Autoscaler for Cyber-LLM API | |
apiVersion: autoscaling/v2 | |
kind: HorizontalPodAutoscaler | |
metadata: | |
name: cyber-llm-api-hpa | |
namespace: cyber-llm | |
labels: | |
app.kubernetes.io/name: cyber-llm | |
app.kubernetes.io/component: autoscaling | |
spec: | |
scaleTargetRef: | |
apiVersion: apps/v1 | |
kind: Deployment | |
name: cyber-llm-api | |
minReplicas: 2 | |
maxReplicas: 10 | |
metrics: | |
# CPU utilization | |
- type: Resource | |
resource: | |
name: cpu | |
target: | |
type: Utilization | |
averageUtilization: 70 | |
# Memory utilization | |
- type: Resource | |
resource: | |
name: memory | |
target: | |
type: Utilization | |
averageUtilization: 80 | |
# GPU utilization (if GPU metrics are available) | |
- type: Pods | |
pods: | |
metric: | |
name: gpu_utilization | |
target: | |
type: AverageValue | |
averageValue: "0.8" | |
behavior: | |
scaleDown: | |
stabilizationWindowSeconds: 300 | |
policies: | |
- type: Percent | |
value: 10 | |
periodSeconds: 60 | |
scaleUp: | |
stabilizationWindowSeconds: 0 | |
policies: | |
- type: Percent | |
value: 50 | |
periodSeconds: 60 | |
- type: Pods | |
value: 2 | |
periodSeconds: 60 | |
# Vertical Pod Autoscaler (if VPA is installed) | |
apiVersion: autoscaling.k8s.io/v1 | |
kind: VerticalPodAutoscaler | |
metadata: | |
name: cyber-llm-api-vpa | |
namespace: cyber-llm | |
labels: | |
app.kubernetes.io/name: cyber-llm | |
app.kubernetes.io/component: autoscaling | |
spec: | |
targetRef: | |
apiVersion: apps/v1 | |
kind: Deployment | |
name: cyber-llm-api | |
updatePolicy: | |
updateMode: "Auto" # or "Off" for recommendation only | |
resourcePolicy: | |
containerPolicies: | |
- containerName: cyber-llm-api | |
maxAllowed: | |
memory: 16Gi | |
cpu: 8 | |
minAllowed: | |
memory: 2Gi | |
cpu: 1 | |
controlledResources: ["cpu", "memory"] | |