k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/prow/cluster/monitoring/mixins/prometheus/prow_alerts.libsonnet (about)

     1  {
     2    prometheusAlerts+:: {
     3      local componentName = $._config.components.monitoring,
     4      groups+: [
     5        {
     6          name: 'prow',
     7          rules: [
     8            {
     9              alert: 'prow-pod-crashlooping',
    10              expr: 'rate(kube_pod_container_status_restarts_total{namespace=~"default|prow-monitoring",job="kube-state-metrics"}[5m]) * 60 * 5 > 0',
    11              'for': '1m',
    12              labels: {
    13                severity: 'critical',
    14              },
    15              annotations: {
    16                message: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.'
    17              },
    18            }
    19          ],
    20        },
    21      ],
    22    },
    23  }