cyber_llm / src /deployment /k8s /deployment.yaml
unit731's picture
Upload core Cyber-LLM platform components
23804b3 verified
# Cyber-LLM API Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: cyber-llm-api
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
app.kubernetes.io/version: "0.4.0"
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 1
selector:
matchLabels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
template:
metadata:
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
app.kubernetes.io/version: "0.4.0"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: cyber-llm-service-account
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
runAsNonRoot: true
containers:
- name: cyber-llm-api
image: cyber-llm:latest
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
envFrom:
- configMapRef:
name: cyber-llm-config
- secretRef:
name: cyber-llm-secrets
resources:
requests:
memory: "4Gi"
cpu: "2000m"
nvidia.com/gpu: "1"
limits:
memory: "8Gi"
cpu: "4000m"
nvidia.com/gpu: "1"
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumeMounts:
- name: models-volume
mountPath: /app/models
- name: data-volume
mountPath: /app/data
- name: logs-volume
mountPath: /app/logs
- name: config-volume
mountPath: /app/configs
readOnly: true
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
volumes:
- name: models-volume
persistentVolumeClaim:
claimName: cyber-llm-models-pvc
- name: data-volume
persistentVolumeClaim:
claimName: cyber-llm-data-pvc
- name: logs-volume
emptyDir: {}
- name: config-volume
configMap:
name: cyber-llm-config
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- cyber-llm
topologyKey: kubernetes.io/hostname
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"