Spaces:

unit731
/

cyber_llm

Running

File size: 1,867 Bytes

23804b3

# Horizontal Pod Autoscaler for Cyber-LLM API
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: cyber-llm-api-hpa
  namespace: cyber-llm
  labels:
    app.kubernetes.io/name: cyber-llm
    app.kubernetes.io/component: autoscaling
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: cyber-llm-api
  minReplicas: 2
  maxReplicas: 10
  metrics:
  # CPU utilization
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  # Memory utilization
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  # GPU utilization (if GPU metrics are available)
  - type: Pods
    pods:
      metric:
        name: gpu_utilization
      target:
        type: AverageValue
        averageValue: "0.8"
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60
      - type: Pods
        value: 2
        periodSeconds: 60

---
# Vertical Pod Autoscaler (if VPA is installed)
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: cyber-llm-api-vpa
  namespace: cyber-llm
  labels:
    app.kubernetes.io/name: cyber-llm
    app.kubernetes.io/component: autoscaling
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: cyber-llm-api
  updatePolicy:
    updateMode: "Auto"  # or "Off" for recommendation only
  resourcePolicy:
    containerPolicies:
    - containerName: cyber-llm-api
      maxAllowed:
        memory: 16Gi
        cpu: 8
      minAllowed:
        memory: 2Gi
        cpu: 1
      controlledResources: ["cpu", "memory"]