github.com/verrazzano/verrazzano@v1.7.0/platform-operator/thirdparty/charts/prometheus-community/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml (about) 1 {{- /* 2 Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml 3 Do not change in-place! In order to change this file first read following link: 4 https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack 5 */ -}} 6 {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} 7 {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} 8 {{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} 9 {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} 10 apiVersion: monitoring.coreos.com/v1 11 kind: PrometheusRule 12 metadata: 13 name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" | trunc 63 | trimSuffix "-" }} 14 namespace: {{ template "kube-prometheus-stack.namespace" . }} 15 labels: 16 app: {{ template "kube-prometheus-stack.name" . }} 17 {{ include "kube-prometheus-stack.labels" . | indent 4 }} 18 {{- if .Values.defaultRules.labels }} 19 {{ toYaml .Values.defaultRules.labels | indent 4 }} 20 {{- end }} 21 {{- if .Values.defaultRules.annotations }} 22 annotations: 23 {{ toYaml .Values.defaultRules.annotations | indent 4 }} 24 {{- end }} 25 spec: 26 groups: 27 - name: prometheus 28 rules: 29 {{- if not (.Values.defaultRules.disabled.PrometheusBadConfig | default false) }} 30 - alert: PrometheusBadConfig 31 annotations: 32 {{- if .Values.defaultRules.additionalRuleAnnotations }} 33 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 34 {{- end }} 35 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration. 36 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusbadconfig 37 summary: Failed Prometheus configuration reload. 38 expr: |- 39 # Without max_over_time, failed scrapes could create false negatives, see 40 # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. 41 max_over_time(prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) == 0 42 for: 10m 43 labels: 44 severity: critical 45 {{- if .Values.defaultRules.additionalRuleLabels }} 46 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 47 {{- end }} 48 {{- end }} 49 {{- if not (.Values.defaultRules.disabled.PrometheusNotificationQueueRunningFull | default false) }} 50 - alert: PrometheusNotificationQueueRunningFull 51 annotations: 52 {{- if .Values.defaultRules.additionalRuleAnnotations }} 53 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 54 {{- end }} 55 description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full. 56 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotificationqueuerunningfull 57 summary: Prometheus alert notification queue predicted to run full in less than 30m. 58 expr: |- 59 # Without min_over_time, failed scrapes could create false negatives, see 60 # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. 61 ( 62 predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) 63 > 64 min_over_time(prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 65 ) 66 for: 15m 67 labels: 68 severity: warning 69 {{- if .Values.defaultRules.additionalRuleLabels }} 70 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 71 {{- end }} 72 {{- end }} 73 {{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToSomeAlertmanagers | default false) }} 74 - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers 75 annotations: 76 {{- if .Values.defaultRules.additionalRuleAnnotations }} 77 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 78 {{- end }} 79 description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.' 80 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstosomealertmanagers 81 summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. 82 expr: |- 83 ( 84 rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 85 / 86 rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 87 ) 88 * 100 89 > 1 90 for: 15m 91 labels: 92 severity: warning 93 {{- if .Values.defaultRules.additionalRuleLabels }} 94 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 95 {{- end }} 96 {{- end }} 97 {{- if not (.Values.defaultRules.disabled.PrometheusNotConnectedToAlertmanagers | default false) }} 98 - alert: PrometheusNotConnectedToAlertmanagers 99 annotations: 100 {{- if .Values.defaultRules.additionalRuleAnnotations }} 101 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 102 {{- end }} 103 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers. 104 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotconnectedtoalertmanagers 105 summary: Prometheus is not connected to any Alertmanagers. 106 expr: |- 107 # Without max_over_time, failed scrapes could create false negatives, see 108 # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. 109 max_over_time(prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) < 1 110 for: 10m 111 labels: 112 severity: warning 113 {{- if .Values.defaultRules.additionalRuleLabels }} 114 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 115 {{- end }} 116 {{- end }} 117 {{- if not (.Values.defaultRules.disabled.PrometheusTSDBReloadsFailing | default false) }} 118 - alert: PrometheusTSDBReloadsFailing 119 annotations: 120 {{- if .Values.defaultRules.additionalRuleAnnotations }} 121 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 122 {{- end }} 123 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h. 124 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbreloadsfailing 125 summary: Prometheus has issues reloading blocks from disk. 126 expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 127 for: 4h 128 labels: 129 severity: warning 130 {{- if .Values.defaultRules.additionalRuleLabels }} 131 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 132 {{- end }} 133 {{- end }} 134 {{- if not (.Values.defaultRules.disabled.PrometheusTSDBCompactionsFailing | default false) }} 135 - alert: PrometheusTSDBCompactionsFailing 136 annotations: 137 {{- if .Values.defaultRules.additionalRuleAnnotations }} 138 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 139 {{- end }} 140 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h. 141 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbcompactionsfailing 142 summary: Prometheus has issues compacting blocks. 143 expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 144 for: 4h 145 labels: 146 severity: warning 147 {{- if .Values.defaultRules.additionalRuleLabels }} 148 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 149 {{- end }} 150 {{- end }} 151 {{- if not (.Values.defaultRules.disabled.PrometheusNotIngestingSamples | default false) }} 152 - alert: PrometheusNotIngestingSamples 153 annotations: 154 {{- if .Values.defaultRules.additionalRuleAnnotations }} 155 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 156 {{- end }} 157 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples. 158 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotingestingsamples 159 summary: Prometheus is not ingesting samples. 160 expr: |- 161 ( 162 rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 163 and 164 ( 165 sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 166 or 167 sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 168 ) 169 ) 170 for: 10m 171 labels: 172 severity: warning 173 {{- if .Values.defaultRules.additionalRuleLabels }} 174 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 175 {{- end }} 176 {{- end }} 177 {{- if not (.Values.defaultRules.disabled.PrometheusDuplicateTimestamps | default false) }} 178 - alert: PrometheusDuplicateTimestamps 179 annotations: 180 {{- if .Values.defaultRules.additionalRuleAnnotations }} 181 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 182 {{- end }} 183 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp. 184 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusduplicatetimestamps 185 summary: Prometheus is dropping samples with duplicate timestamps. 186 expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 187 for: 10m 188 labels: 189 severity: warning 190 {{- if .Values.defaultRules.additionalRuleLabels }} 191 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 192 {{- end }} 193 {{- end }} 194 {{- if not (.Values.defaultRules.disabled.PrometheusOutOfOrderTimestamps | default false) }} 195 - alert: PrometheusOutOfOrderTimestamps 196 annotations: 197 {{- if .Values.defaultRules.additionalRuleAnnotations }} 198 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 199 {{- end }} 200 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order. 201 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusoutofordertimestamps 202 summary: Prometheus drops samples with out-of-order timestamps. 203 expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 204 for: 10m 205 labels: 206 severity: warning 207 {{- if .Values.defaultRules.additionalRuleLabels }} 208 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 209 {{- end }} 210 {{- end }} 211 {{- if not (.Values.defaultRules.disabled.PrometheusRemoteStorageFailures | default false) }} 212 - alert: PrometheusRemoteStorageFailures 213 annotations: 214 {{- if .Values.defaultRules.additionalRuleAnnotations }} 215 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 216 {{- end }} 217 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}} 218 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotestoragefailures 219 summary: Prometheus fails to send samples to remote storage. 220 expr: |- 221 ( 222 (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) 223 / 224 ( 225 (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) 226 + 227 (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) 228 ) 229 ) 230 * 100 231 > 1 232 for: 15m 233 labels: 234 severity: critical 235 {{- if .Values.defaultRules.additionalRuleLabels }} 236 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 237 {{- end }} 238 {{- end }} 239 {{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteBehind | default false) }} 240 - alert: PrometheusRemoteWriteBehind 241 annotations: 242 {{- if .Values.defaultRules.additionalRuleAnnotations }} 243 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 244 {{- end }} 245 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}. 246 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritebehind 247 summary: Prometheus remote write is behind. 248 expr: |- 249 # Without max_over_time, failed scrapes could create false negatives, see 250 # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. 251 ( 252 max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 253 - ignoring(remote_name, url) group_right 254 max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 255 ) 256 > 120 257 for: 15m 258 labels: 259 severity: critical 260 {{- if .Values.defaultRules.additionalRuleLabels }} 261 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 262 {{- end }} 263 {{- end }} 264 {{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteDesiredShards | default false) }} 265 - alert: PrometheusRemoteWriteDesiredShards 266 annotations: 267 {{- if .Values.defaultRules.additionalRuleAnnotations }} 268 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 269 {{- end }} 270 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}. 271 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritedesiredshards 272 summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. 273 expr: |- 274 # Without max_over_time, failed scrapes could create false negatives, see 275 # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. 276 ( 277 max_over_time(prometheus_remote_storage_shards_desired{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 278 > 279 max_over_time(prometheus_remote_storage_shards_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) 280 ) 281 for: 15m 282 labels: 283 severity: warning 284 {{- if .Values.defaultRules.additionalRuleLabels }} 285 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 286 {{- end }} 287 {{- end }} 288 {{- if not (.Values.defaultRules.disabled.PrometheusRuleFailures | default false) }} 289 - alert: PrometheusRuleFailures 290 annotations: 291 {{- if .Values.defaultRules.additionalRuleAnnotations }} 292 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 293 {{- end }} 294 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m. 295 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusrulefailures 296 summary: Prometheus is failing rule evaluations. 297 expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 298 for: 15m 299 labels: 300 severity: critical 301 {{- if .Values.defaultRules.additionalRuleLabels }} 302 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 303 {{- end }} 304 {{- end }} 305 {{- if not (.Values.defaultRules.disabled.PrometheusMissingRuleEvaluations | default false) }} 306 - alert: PrometheusMissingRuleEvaluations 307 annotations: 308 {{- if .Values.defaultRules.additionalRuleAnnotations }} 309 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 310 {{- end }} 311 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m. 312 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusmissingruleevaluations 313 summary: Prometheus is missing rule evaluations due to slow rule group evaluation. 314 expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 315 for: 15m 316 labels: 317 severity: warning 318 {{- if .Values.defaultRules.additionalRuleLabels }} 319 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 320 {{- end }} 321 {{- end }} 322 {{- if not (.Values.defaultRules.disabled.PrometheusTargetLimitHit | default false) }} 323 - alert: PrometheusTargetLimitHit 324 annotations: 325 {{- if .Values.defaultRules.additionalRuleAnnotations }} 326 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 327 {{- end }} 328 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit. 329 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetlimithit 330 summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. 331 expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 332 for: 15m 333 labels: 334 severity: warning 335 {{- if .Values.defaultRules.additionalRuleLabels }} 336 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 337 {{- end }} 338 {{- end }} 339 {{- if not (.Values.defaultRules.disabled.PrometheusLabelLimitHit | default false) }} 340 - alert: PrometheusLabelLimitHit 341 annotations: 342 {{- if .Values.defaultRules.additionalRuleAnnotations }} 343 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 344 {{- end }} 345 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. 346 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuslabellimithit 347 summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. 348 expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 349 for: 15m 350 labels: 351 severity: warning 352 {{- if .Values.defaultRules.additionalRuleLabels }} 353 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 354 {{- end }} 355 {{- end }} 356 {{- if not (.Values.defaultRules.disabled.PrometheusScrapeBodySizeLimitHit | default false) }} 357 - alert: PrometheusScrapeBodySizeLimitHit 358 annotations: 359 {{- if .Values.defaultRules.additionalRuleAnnotations }} 360 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 361 {{- end }} 362 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured body_size_limit. 363 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapebodysizelimithit 364 summary: Prometheus has dropped some targets that exceeded body size limit. 365 expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 366 for: 15m 367 labels: 368 severity: warning 369 {{- if .Values.defaultRules.additionalRuleLabels }} 370 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 371 {{- end }} 372 {{- end }} 373 {{- if not (.Values.defaultRules.disabled.PrometheusScrapeSampleLimitHit | default false) }} 374 - alert: PrometheusScrapeSampleLimitHit 375 annotations: 376 {{- if .Values.defaultRules.additionalRuleAnnotations }} 377 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 378 {{- end }} 379 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured sample_limit. 380 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapesamplelimithit 381 summary: Prometheus has failed scrapes that have exceeded the configured sample limit. 382 expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 383 for: 15m 384 labels: 385 severity: warning 386 {{- if .Values.defaultRules.additionalRuleLabels }} 387 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 388 {{- end }} 389 {{- end }} 390 {{- if not (.Values.defaultRules.disabled.PrometheusTargetSyncFailure | default false) }} 391 - alert: PrometheusTargetSyncFailure 392 annotations: 393 {{- if .Values.defaultRules.additionalRuleAnnotations }} 394 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 395 {{- end }} 396 description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.' 397 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetsyncfailure 398 summary: Prometheus has failed to sync targets. 399 expr: increase(prometheus_target_sync_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[30m]) > 0 400 for: 5m 401 labels: 402 severity: critical 403 {{- if .Values.defaultRules.additionalRuleLabels }} 404 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 405 {{- end }} 406 {{- end }} 407 {{- if not (.Values.defaultRules.disabled.PrometheusHighQueryLoad | default false) }} 408 - alert: PrometheusHighQueryLoad 409 annotations: 410 {{- if .Values.defaultRules.additionalRuleAnnotations }} 411 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 412 {{- end }} 413 description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} query API has less than 20% available capacity in its query engine for the last 15 minutes. 414 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheushighqueryload 415 summary: Prometheus is reaching its maximum capacity serving concurrent requests. 416 expr: avg_over_time(prometheus_engine_queries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.8 417 for: 15m 418 labels: 419 severity: warning 420 {{- if .Values.defaultRules.additionalRuleLabels }} 421 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 422 {{- end }} 423 {{- end }} 424 {{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToAnyAlertmanager | default false) }} 425 - alert: PrometheusErrorSendingAlertsToAnyAlertmanager 426 annotations: 427 {{- if .Values.defaultRules.additionalRuleAnnotations }} 428 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 429 {{- end }} 430 description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.' 431 runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstoanyalertmanager 432 summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. 433 expr: |- 434 min without (alertmanager) ( 435 rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) 436 / 437 rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) 438 ) 439 * 100 440 > 3 441 for: 15m 442 labels: 443 severity: critical 444 {{- if .Values.defaultRules.additionalRuleLabels }} 445 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 446 {{- end }} 447 {{- end }} 448 {{- end }}