k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/prow/cluster/monitoring/mixins/prometheus/stale_alerts.libsonnet (about)

     1  {
     2    prometheusAlerts+:: {
     3      groups+: [
     4        {
     5          name: 'prow-stale',
     6          rules: [
     7            {
     8              alert: 'Prow images are stale',
     9              # Set day of week based stale alert, so that it can be stricter than 7 days, since k8s prow is automatically deployed now.
    10              # Considering that there might be days that there is no prow update(which might be rare but could be true), the alert should at least
    11              # be 2 work days. In considering weekends, monday and tuesdays will be +2 days.
    12              expr: |||
    13                ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()<6) and (day_of_week()>2))
    14                or ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()==1))
    15                or ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()==2))
    16              ||| % [$._config.prowImageStaleByDays.daysStale, $._config.prowImageStaleByDays.daysStale+2, $._config.prowImageStaleByDays.daysStale+2],
    17              'for': $._config.prowImageStaleByDays.eventDuration,
    18              labels: {
    19                severity: 'critical',
    20              },
    21              annotations: {
    22                message: 'The prow images are older than %(daysStale)d days for %(eventDuration)s.' % ($._config.prowImageStaleByDays),
    23              },
    24            }
    25          ],
    26        },
    27      ],
    28    },
    29  }