File size: 1,867 Bytes
23804b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# Horizontal Pod Autoscaler for Cyber-LLM API
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: cyber-llm-api-hpa
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: autoscaling
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: cyber-llm-api
minReplicas: 2
maxReplicas: 10
metrics:
# CPU utilization
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
# Memory utilization
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# GPU utilization (if GPU metrics are available)
- type: Pods
pods:
metric:
name: gpu_utilization
target:
type: AverageValue
averageValue: "0.8"
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 2
periodSeconds: 60
---
# Vertical Pod Autoscaler (if VPA is installed)
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: cyber-llm-api-vpa
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: autoscaling
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: cyber-llm-api
updatePolicy:
updateMode: "Auto" # or "Off" for recommendation only
resourcePolicy:
containerPolicies:
- containerName: cyber-llm-api
maxAllowed:
memory: 16Gi
cpu: 8
minAllowed:
memory: 2Gi
cpu: 1
controlledResources: ["cpu", "memory"]
|