k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/prow/cluster/monitoring/mixins/prometheus/stale_alerts.libsonnet (about) 1 { 2 prometheusAlerts+:: { 3 groups+: [ 4 { 5 name: 'prow-stale', 6 rules: [ 7 { 8 alert: 'Prow images are stale', 9 # Set day of week based stale alert, so that it can be stricter than 7 days, since k8s prow is automatically deployed now. 10 # Considering that there might be days that there is no prow update(which might be rare but could be true), the alert should at least 11 # be 2 work days. In considering weekends, monday and tuesdays will be +2 days. 12 expr: ||| 13 ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()<6) and (day_of_week()>2)) 14 or ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()==1)) 15 or ((time()-max(prow_version) > %d * 24 * 3600) and (day_of_week()==2)) 16 ||| % [$._config.prowImageStaleByDays.daysStale, $._config.prowImageStaleByDays.daysStale+2, $._config.prowImageStaleByDays.daysStale+2], 17 'for': $._config.prowImageStaleByDays.eventDuration, 18 labels: { 19 severity: 'critical', 20 }, 21 annotations: { 22 message: 'The prow images are older than %(daysStale)d days for %(eventDuration)s.' % ($._config.prowImageStaleByDays), 23 }, 24 } 25 ], 26 }, 27 ], 28 }, 29 }