k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/prow/cluster/monitoring/mixins/prometheus/prow_monitoring_absent_alerts.libsonnet (about)

     1  {
     2    prometheusAlerts+:: {
     3      local componentName = $._config.components.monitoring,
     4      groups+: [
     5        {
     6          name: 'prow-monitoring-absent',
     7          rules: [{
     8            alert: 'ServiceLostHA',
     9            expr: |||
    10              sum(up{job=~"prometheus|alertmanager"}) by (job) <= 1
    11            |||,
    12            'for': '5m',
    13            labels: { 
    14              severity: 'critical',
    15              slo: componentName,
    16            },
    17            annotations: {
    18              message: 'The service {{ $labels.job }} has at most 1 instance for 5 minutes.',
    19            },
    20          }] + [
    21            {
    22              alert: '%sDown' % name,
    23              expr: |||
    24                absent(up{job="%s"} == 1)
    25              ||| % name,
    26              'for': '5m',
    27              labels: {
    28                severity: 'critical',
    29                slo: componentName,
    30              },
    31              annotations: {
    32                message: 'The service %s has been down for 5 minutes.' % name,
    33              },
    34            }
    35            for name in ['alertmanager', 'prometheus', 'grafana']
    36          ],
    37        },
    38      ],
    39    },
    40  }