github.com/verrazzano/verrazzano@v1.7.0/platform-operator/thirdparty/charts/prometheus-community/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml (about) 1 {{- /* 2 Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetesControlPlane-prometheusRule.yaml 3 Do not change in-place! In order to change this file first read following link: 4 https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack 5 */ -}} 6 {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} 7 {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} 8 {{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} 9 apiVersion: monitoring.coreos.com/v1 10 kind: PrometheusRule 11 metadata: 12 name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} 13 namespace: {{ template "kube-prometheus-stack.namespace" . }} 14 labels: 15 app: {{ template "kube-prometheus-stack.name" . }} 16 {{ include "kube-prometheus-stack.labels" . | indent 4 }} 17 {{- if .Values.defaultRules.labels }} 18 {{ toYaml .Values.defaultRules.labels | indent 4 }} 19 {{- end }} 20 {{- if .Values.defaultRules.annotations }} 21 annotations: 22 {{ toYaml .Values.defaultRules.annotations | indent 4 }} 23 {{- end }} 24 spec: 25 groups: 26 - name: kubernetes-apps 27 rules: 28 {{- if not (.Values.defaultRules.disabled.KubePodCrashLooping | default false) }} 29 - alert: KubePodCrashLooping 30 annotations: 31 {{- if .Values.defaultRules.additionalRuleAnnotations }} 32 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 33 {{- end }} 34 description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' 35 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping 36 summary: Pod is crash looping. 37 expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m]) >= 1 38 for: 15m 39 labels: 40 severity: warning 41 {{- if .Values.defaultRules.additionalRuleLabels }} 42 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 43 {{- end }} 44 {{- end }} 45 {{- if not (.Values.defaultRules.disabled.KubePodNotReady | default false) }} 46 - alert: KubePodNotReady 47 annotations: 48 {{- if .Values.defaultRules.additionalRuleAnnotations }} 49 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 50 {{- end }} 51 description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes. 52 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready 53 summary: Pod has been in a non-ready state for more than 15 minutes. 54 expr: |- 55 sum by (namespace, pod, cluster, verrazzano_cluster) ( 56 max by (namespace, pod, cluster, verrazzano_cluster) ( 57 kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown|Failed"} 58 ) * on (namespace, pod, cluster, verrazzano_cluster) group_left(owner_kind) topk by (namespace, pod, cluster, verrazzano_cluster) ( 59 1, max by (namespace, pod, owner_kind, cluster, verrazzano_cluster) (kube_pod_owner{owner_kind!="Job"}) 60 ) 61 ) > 0 62 for: 15m 63 labels: 64 severity: warning 65 {{- if .Values.defaultRules.additionalRuleLabels }} 66 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 67 {{- end }} 68 {{- end }} 69 {{- if not (.Values.defaultRules.disabled.KubeDeploymentGenerationMismatch | default false) }} 70 - alert: KubeDeploymentGenerationMismatch 71 annotations: 72 {{- if .Values.defaultRules.additionalRuleAnnotations }} 73 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 74 {{- end }} 75 description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back. 76 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch 77 summary: Deployment generation mismatch due to possible roll-back 78 expr: |- 79 kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 80 != 81 kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 82 for: 15m 83 labels: 84 severity: warning 85 {{- if .Values.defaultRules.additionalRuleLabels }} 86 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 87 {{- end }} 88 {{- end }} 89 {{- if not (.Values.defaultRules.disabled.KubeDeploymentReplicasMismatch | default false) }} 90 - alert: KubeDeploymentReplicasMismatch 91 annotations: 92 {{- if .Values.defaultRules.additionalRuleAnnotations }} 93 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 94 {{- end }} 95 description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. 96 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch 97 summary: Deployment has not matched the expected number of replicas. 98 expr: |- 99 ( 100 kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 101 > 102 kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 103 ) and ( 104 changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) 105 == 106 0 107 ) 108 for: 15m 109 labels: 110 severity: warning 111 {{- if .Values.defaultRules.additionalRuleLabels }} 112 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 113 {{- end }} 114 {{- end }} 115 {{- if not (.Values.defaultRules.disabled.KubeStatefulSetReplicasMismatch | default false) }} 116 - alert: KubeStatefulSetReplicasMismatch 117 annotations: 118 {{- if .Values.defaultRules.additionalRuleAnnotations }} 119 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 120 {{- end }} 121 description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. 122 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch 123 summary: Deployment has not matched the expected number of replicas. 124 expr: |- 125 ( 126 kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 127 != 128 kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 129 ) and ( 130 changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) 131 == 132 0 133 ) 134 for: 15m 135 labels: 136 severity: warning 137 {{- if .Values.defaultRules.additionalRuleLabels }} 138 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 139 {{- end }} 140 {{- end }} 141 {{- if not (.Values.defaultRules.disabled.KubeStatefulSetGenerationMismatch | default false) }} 142 - alert: KubeStatefulSetGenerationMismatch 143 annotations: 144 {{- if .Values.defaultRules.additionalRuleAnnotations }} 145 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 146 {{- end }} 147 description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back. 148 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch 149 summary: StatefulSet generation mismatch due to possible roll-back 150 expr: |- 151 kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 152 != 153 kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 154 for: 15m 155 labels: 156 severity: warning 157 {{- if .Values.defaultRules.additionalRuleLabels }} 158 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 159 {{- end }} 160 {{- end }} 161 {{- if not (.Values.defaultRules.disabled.KubeStatefulSetUpdateNotRolledOut | default false) }} 162 - alert: KubeStatefulSetUpdateNotRolledOut 163 annotations: 164 {{- if .Values.defaultRules.additionalRuleAnnotations }} 165 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 166 {{- end }} 167 description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out. 168 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout 169 summary: StatefulSet update has not been rolled out. 170 expr: |- 171 ( 172 max without (revision) ( 173 kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 174 unless 175 kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 176 ) 177 * 178 ( 179 kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 180 != 181 kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 182 ) 183 ) and ( 184 changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m]) 185 == 186 0 187 ) 188 for: 15m 189 labels: 190 severity: warning 191 {{- if .Values.defaultRules.additionalRuleLabels }} 192 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 193 {{- end }} 194 {{- end }} 195 {{- if not (.Values.defaultRules.disabled.KubeDaemonSetRolloutStuck | default false) }} 196 - alert: KubeDaemonSetRolloutStuck 197 annotations: 198 {{- if .Values.defaultRules.additionalRuleAnnotations }} 199 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 200 {{- end }} 201 description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes. 202 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck 203 summary: DaemonSet rollout is stuck. 204 expr: |- 205 ( 206 ( 207 kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 208 != 209 kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 210 ) or ( 211 kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 212 != 213 0 214 ) or ( 215 kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 216 != 217 kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 218 ) or ( 219 kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 220 != 221 kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 222 ) 223 ) and ( 224 changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m]) 225 == 226 0 227 ) 228 for: 15m 229 labels: 230 severity: warning 231 {{- if .Values.defaultRules.additionalRuleLabels }} 232 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 233 {{- end }} 234 {{- end }} 235 {{- if not (.Values.defaultRules.disabled.KubeContainerWaiting | default false) }} 236 - alert: KubeContainerWaiting 237 annotations: 238 {{- if .Values.defaultRules.additionalRuleAnnotations }} 239 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 240 {{- end }} 241 description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. 242 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting 243 summary: Pod container waiting longer than 1 hour 244 expr: sum by (namespace, pod, container, cluster, verrazzano_cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) > 0 245 for: 1h 246 labels: 247 severity: warning 248 {{- if .Values.defaultRules.additionalRuleLabels }} 249 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 250 {{- end }} 251 {{- end }} 252 {{- if not (.Values.defaultRules.disabled.KubeDaemonSetNotScheduled | default false) }} 253 - alert: KubeDaemonSetNotScheduled 254 annotations: 255 {{- if .Values.defaultRules.additionalRuleAnnotations }} 256 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 257 {{- end }} 258 description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' 259 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled 260 summary: DaemonSet pods are not scheduled. 261 expr: |- 262 kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 263 - 264 kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 265 for: 10m 266 labels: 267 severity: warning 268 {{- if .Values.defaultRules.additionalRuleLabels }} 269 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 270 {{- end }} 271 {{- end }} 272 {{- if not (.Values.defaultRules.disabled.KubeDaemonSetMisScheduled | default false) }} 273 - alert: KubeDaemonSetMisScheduled 274 annotations: 275 {{- if .Values.defaultRules.additionalRuleAnnotations }} 276 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 277 {{- end }} 278 description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' 279 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled 280 summary: DaemonSet pods are misscheduled. 281 expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 282 for: 15m 283 labels: 284 severity: warning 285 {{- if .Values.defaultRules.additionalRuleLabels }} 286 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 287 {{- end }} 288 {{- end }} 289 {{- if not (.Values.defaultRules.disabled.KubeJobNotCompleted | default false) }} 290 - alert: KubeJobNotCompleted 291 annotations: 292 {{- if .Values.defaultRules.additionalRuleAnnotations }} 293 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 294 {{- end }} 295 description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete. 296 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted 297 summary: Job did not complete in time 298 expr: |- 299 time() - max by (namespace, job_name, cluster, verrazzano_cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 300 and 301 kube_job_status_active{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0) > 43200 302 labels: 303 severity: warning 304 {{- if .Values.defaultRules.additionalRuleLabels }} 305 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 306 {{- end }} 307 {{- end }} 308 {{- if not (.Values.defaultRules.disabled.KubeJobFailed | default false) }} 309 - alert: KubeJobFailed 310 annotations: 311 {{- if .Values.defaultRules.additionalRuleAnnotations }} 312 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 313 {{- end }} 314 description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert. 315 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed 316 summary: Job failed to complete. 317 expr: kube_job_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 318 for: 15m 319 labels: 320 severity: warning 321 {{- if .Values.defaultRules.additionalRuleLabels }} 322 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 323 {{- end }} 324 {{- end }} 325 {{- if not (.Values.defaultRules.disabled.KubeHpaReplicasMismatch | default false) }} 326 - alert: KubeHpaReplicasMismatch 327 annotations: 328 {{- if .Values.defaultRules.additionalRuleAnnotations }} 329 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 330 {{- end }} 331 description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes. 332 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch 333 summary: HPA has not matched desired number of replicas. 334 expr: |- 335 (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 336 != 337 kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) 338 and 339 (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 340 > 341 kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) 342 and 343 (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 344 < 345 kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) 346 and 347 changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0 348 for: 15m 349 labels: 350 severity: warning 351 {{- if .Values.defaultRules.additionalRuleLabels }} 352 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 353 {{- end }} 354 {{- end }} 355 {{- if not (.Values.defaultRules.disabled.KubeHpaMaxedOut | default false) }} 356 - alert: KubeHpaMaxedOut 357 annotations: 358 {{- if .Values.defaultRules.additionalRuleAnnotations }} 359 {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} 360 {{- end }} 361 description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes. 362 runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout 363 summary: HPA is running at max replicas 364 expr: |- 365 kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 366 == 367 kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} 368 for: 15m 369 labels: 370 severity: warning 371 {{- if .Values.defaultRules.additionalRuleLabels }} 372 {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} 373 {{- end }} 374 {{- end }} 375 {{- end }}