Spaces:

unit731
/

cyber_llm

Running

cyber_llm / src /deployment /k8s /autoscaling.yaml

Upload core Cyber-LLM platform components

23804b3 verified 9 days ago

1.87 kB

	# Horizontal Pod Autoscaler for Cyber-LLM API
	apiVersion: autoscaling/v2
	kind: HorizontalPodAutoscaler
	metadata:
	name: cyber-llm-api-hpa
	namespace: cyber-llm
	labels:
	app.kubernetes.io/name: cyber-llm
	app.kubernetes.io/component: autoscaling
	spec:
	scaleTargetRef:
	apiVersion: apps/v1
	kind: Deployment
	name: cyber-llm-api
	minReplicas: 2
	maxReplicas: 10
	metrics:
	# CPU utilization
	- type: Resource
	resource:
	name: cpu
	target:
	type: Utilization
	averageUtilization: 70
	# Memory utilization
	- type: Resource
	resource:
	name: memory
	target:
	type: Utilization
	averageUtilization: 80
	# GPU utilization (if GPU metrics are available)
	- type: Pods
	pods:
	metric:
	name: gpu_utilization
	target:
	type: AverageValue
	averageValue: "0.8"
	behavior:
	scaleDown:
	stabilizationWindowSeconds: 300
	policies:
	- type: Percent
	value: 10
	periodSeconds: 60
	scaleUp:
	stabilizationWindowSeconds: 0
	policies:
	- type: Percent
	value: 50
	periodSeconds: 60
	- type: Pods
	value: 2
	periodSeconds: 60

	---
	# Vertical Pod Autoscaler (if VPA is installed)
	apiVersion: autoscaling.k8s.io/v1
	kind: VerticalPodAutoscaler
	metadata:
	name: cyber-llm-api-vpa
	namespace: cyber-llm
	labels:
	app.kubernetes.io/name: cyber-llm
	app.kubernetes.io/component: autoscaling
	spec:
	targetRef:
	apiVersion: apps/v1
	kind: Deployment
	name: cyber-llm-api
	updatePolicy:
	updateMode: "Auto" # or "Off" for recommendation only
	resourcePolicy:
	containerPolicies:
	- containerName: cyber-llm-api
	maxAllowed:
	memory: 16Gi
	cpu: 8
	minAllowed:
	memory: 2Gi
	cpu: 1
	controlledResources: ["cpu", "memory"]