File size: 3,494 Bytes
23804b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# Cyber-LLM API Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: cyber-llm-api
namespace: cyber-llm
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
app.kubernetes.io/version: "0.4.0"
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 1
selector:
matchLabels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
template:
metadata:
labels:
app.kubernetes.io/name: cyber-llm
app.kubernetes.io/component: api
app.kubernetes.io/version: "0.4.0"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: cyber-llm-service-account
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
runAsNonRoot: true
containers:
- name: cyber-llm-api
image: cyber-llm:latest
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
envFrom:
- configMapRef:
name: cyber-llm-config
- secretRef:
name: cyber-llm-secrets
resources:
requests:
memory: "4Gi"
cpu: "2000m"
nvidia.com/gpu: "1"
limits:
memory: "8Gi"
cpu: "4000m"
nvidia.com/gpu: "1"
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumeMounts:
- name: models-volume
mountPath: /app/models
- name: data-volume
mountPath: /app/data
- name: logs-volume
mountPath: /app/logs
- name: config-volume
mountPath: /app/configs
readOnly: true
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
volumes:
- name: models-volume
persistentVolumeClaim:
claimName: cyber-llm-models-pvc
- name: data-volume
persistentVolumeClaim:
claimName: cyber-llm-data-pvc
- name: logs-volume
emptyDir: {}
- name: config-volume
configMap:
name: cyber-llm-config
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- cyber-llm
topologyKey: kubernetes.io/hostname
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
|