|
|
|
global: |
|
scrape_interval: 15s |
|
evaluation_interval: 15s |
|
external_labels: |
|
cluster: 'cyber-llm-production' |
|
environment: 'production' |
|
|
|
rule_files: |
|
- "/etc/prometheus/rules/*.yml" |
|
|
|
alerting: |
|
alertmanagers: |
|
- static_configs: |
|
- targets: |
|
- alertmanager:9093 |
|
|
|
scrape_configs: |
|
|
|
- job_name: 'cyber-llm-api' |
|
metrics_path: '/metrics' |
|
scrape_interval: 30s |
|
static_configs: |
|
- targets: ['cyber-llm-api-service:8000'] |
|
relabel_configs: |
|
- source_labels: [__address__] |
|
target_label: __param_target |
|
- source_labels: [__param_target] |
|
target_label: instance |
|
- target_label: __address__ |
|
replacement: cyber-llm-api-service:8000 |
|
|
|
|
|
- job_name: 'kubernetes-apiservers' |
|
kubernetes_sd_configs: |
|
- role: endpoints |
|
scheme: https |
|
tls_config: |
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt |
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token |
|
relabel_configs: |
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] |
|
action: keep |
|
regex: default;kubernetes;https |
|
|
|
|
|
- job_name: 'kubernetes-nodes' |
|
scheme: https |
|
tls_config: |
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt |
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token |
|
kubernetes_sd_configs: |
|
- role: node |
|
relabel_configs: |
|
- action: labelmap |
|
regex: __meta_kubernetes_node_label_(.+) |
|
- target_label: __address__ |
|
replacement: kubernetes.default.svc:443 |
|
- source_labels: [__meta_kubernetes_node_name] |
|
regex: (.+) |
|
target_label: __metrics_path__ |
|
replacement: /api/v1/nodes/${1}/proxy/metrics |
|
|
|
|
|
- job_name: 'kubernetes-pods' |
|
kubernetes_sd_configs: |
|
- role: pod |
|
relabel_configs: |
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] |
|
action: keep |
|
regex: true |
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] |
|
action: replace |
|
target_label: __metrics_path__ |
|
regex: (.+) |
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] |
|
action: replace |
|
regex: ([^:]+)(?::\d+)?;(\d+) |
|
replacement: $1:$2 |
|
target_label: __address__ |
|
- action: labelmap |
|
regex: __meta_kubernetes_pod_label_(.+) |
|
- source_labels: [__meta_kubernetes_namespace] |
|
action: replace |
|
target_label: kubernetes_namespace |
|
- source_labels: [__meta_kubernetes_pod_name] |
|
action: replace |
|
target_label: kubernetes_pod_name |
|
|
|
|
|
- job_name: 'gpu-metrics' |
|
static_configs: |
|
- targets: ['nvidia-gpu-exporter:9835'] |
|
|
|
|
|
- job_name: 'cyber-llm-business-metrics' |
|
metrics_path: '/business-metrics' |
|
scrape_interval: 60s |
|
static_configs: |
|
- targets: ['cyber-llm-api-service:8000'] |
|
|