File size: 1,867 Bytes
23804b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Horizontal Pod Autoscaler for Cyber-LLM API
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: cyber-llm-api-hpa
  namespace: cyber-llm
  labels:
    app.kubernetes.io/name: cyber-llm
    app.kubernetes.io/component: autoscaling
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: cyber-llm-api
  minReplicas: 2
  maxReplicas: 10
  metrics:
  # CPU utilization
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  # Memory utilization
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  # GPU utilization (if GPU metrics are available)
  - type: Pods
    pods:
      metric:
        name: gpu_utilization
      target:
        type: AverageValue
        averageValue: "0.8"
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60
      - type: Pods
        value: 2
        periodSeconds: 60

---
# Vertical Pod Autoscaler (if VPA is installed)
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: cyber-llm-api-vpa
  namespace: cyber-llm
  labels:
    app.kubernetes.io/name: cyber-llm
    app.kubernetes.io/component: autoscaling
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: cyber-llm-api
  updatePolicy:
    updateMode: "Auto"  # or "Off" for recommendation only
  resourcePolicy:
    containerPolicies:
    - containerName: cyber-llm-api
      maxAllowed:
        memory: 16Gi
        cpu: 8
      minAllowed:
        memory: 2Gi
        cpu: 1
      controlledResources: ["cpu", "memory"]