github.com/verrazzano/verrazzano@v1.7.0/platform-operator/thirdparty/charts/prometheus-community/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml (about)

     1  {{- /*
     2  Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetesControlPlane-prometheusRule.yaml
     3  Do not change in-place! In order to change this file first read following link:
     4  https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
     5  */ -}}
     6  {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
     7  {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }}
     8  {{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
     9  apiVersion: monitoring.coreos.com/v1
    10  kind: PrometheusRule
    11  metadata:
    12    name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
    13    namespace: {{ template "kube-prometheus-stack.namespace" . }}
    14    labels:
    15      app: {{ template "kube-prometheus-stack.name" . }}
    16  {{ include "kube-prometheus-stack.labels" . | indent 4 }}
    17  {{- if .Values.defaultRules.labels }}
    18  {{ toYaml .Values.defaultRules.labels | indent 4 }}
    19  {{- end }}
    20  {{- if .Values.defaultRules.annotations }}
    21    annotations:
    22  {{ toYaml .Values.defaultRules.annotations | indent 4 }}
    23  {{- end }}
    24  spec:
    25    groups:
    26    - name: kubernetes-apps
    27      rules:
    28  {{- if not (.Values.defaultRules.disabled.KubePodCrashLooping | default false) }}
    29      - alert: KubePodCrashLooping
    30        annotations:
    31  {{- if .Values.defaultRules.additionalRuleAnnotations }}
    32  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
    33  {{- end }}
    34          description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").'
    35          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping
    36          summary: Pod is crash looping.
    37        expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m]) >= 1
    38        for: 15m
    39        labels:
    40          severity: warning
    41  {{- if .Values.defaultRules.additionalRuleLabels }}
    42  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
    43  {{- end }}
    44  {{- end }}
    45  {{- if not (.Values.defaultRules.disabled.KubePodNotReady | default false) }}
    46      - alert: KubePodNotReady
    47        annotations:
    48  {{- if .Values.defaultRules.additionalRuleAnnotations }}
    49  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
    50  {{- end }}
    51          description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.
    52          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready
    53          summary: Pod has been in a non-ready state for more than 15 minutes.
    54        expr: |-
    55          sum by (namespace, pod, cluster, verrazzano_cluster) (
    56            max by (namespace, pod, cluster, verrazzano_cluster) (
    57              kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown|Failed"}
    58            ) * on (namespace, pod, cluster, verrazzano_cluster) group_left(owner_kind) topk by (namespace, pod, cluster, verrazzano_cluster) (
    59              1, max by (namespace, pod, owner_kind, cluster, verrazzano_cluster) (kube_pod_owner{owner_kind!="Job"})
    60            )
    61          ) > 0
    62        for: 15m
    63        labels:
    64          severity: warning
    65  {{- if .Values.defaultRules.additionalRuleLabels }}
    66  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
    67  {{- end }}
    68  {{- end }}
    69  {{- if not (.Values.defaultRules.disabled.KubeDeploymentGenerationMismatch | default false) }}
    70      - alert: KubeDeploymentGenerationMismatch
    71        annotations:
    72  {{- if .Values.defaultRules.additionalRuleAnnotations }}
    73  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
    74  {{- end }}
    75          description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
    76          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch
    77          summary: Deployment generation mismatch due to possible roll-back
    78        expr: |-
    79          kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
    80            !=
    81          kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
    82        for: 15m
    83        labels:
    84          severity: warning
    85  {{- if .Values.defaultRules.additionalRuleLabels }}
    86  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
    87  {{- end }}
    88  {{- end }}
    89  {{- if not (.Values.defaultRules.disabled.KubeDeploymentReplicasMismatch | default false) }}
    90      - alert: KubeDeploymentReplicasMismatch
    91        annotations:
    92  {{- if .Values.defaultRules.additionalRuleAnnotations }}
    93  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
    94  {{- end }}
    95          description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
    96          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch
    97          summary: Deployment has not matched the expected number of replicas.
    98        expr: |-
    99          (
   100            kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   101              >
   102            kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   103          ) and (
   104            changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
   105              ==
   106            0
   107          )
   108        for: 15m
   109        labels:
   110          severity: warning
   111  {{- if .Values.defaultRules.additionalRuleLabels }}
   112  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   113  {{- end }}
   114  {{- end }}
   115  {{- if not (.Values.defaultRules.disabled.KubeStatefulSetReplicasMismatch | default false) }}
   116      - alert: KubeStatefulSetReplicasMismatch
   117        annotations:
   118  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   119  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   120  {{- end }}
   121          description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
   122          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch
   123          summary: Deployment has not matched the expected number of replicas.
   124        expr: |-
   125          (
   126            kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   127              !=
   128            kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   129          ) and (
   130            changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
   131              ==
   132            0
   133          )
   134        for: 15m
   135        labels:
   136          severity: warning
   137  {{- if .Values.defaultRules.additionalRuleLabels }}
   138  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   139  {{- end }}
   140  {{- end }}
   141  {{- if not (.Values.defaultRules.disabled.KubeStatefulSetGenerationMismatch | default false) }}
   142      - alert: KubeStatefulSetGenerationMismatch
   143        annotations:
   144  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   145  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   146  {{- end }}
   147          description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
   148          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch
   149          summary: StatefulSet generation mismatch due to possible roll-back
   150        expr: |-
   151          kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   152            !=
   153          kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   154        for: 15m
   155        labels:
   156          severity: warning
   157  {{- if .Values.defaultRules.additionalRuleLabels }}
   158  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   159  {{- end }}
   160  {{- end }}
   161  {{- if not (.Values.defaultRules.disabled.KubeStatefulSetUpdateNotRolledOut | default false) }}
   162      - alert: KubeStatefulSetUpdateNotRolledOut
   163        annotations:
   164  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   165  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   166  {{- end }}
   167          description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
   168          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout
   169          summary: StatefulSet update has not been rolled out.
   170        expr: |-
   171          (
   172            max without (revision) (
   173              kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   174                unless
   175              kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   176            )
   177              *
   178            (
   179              kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   180                !=
   181              kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   182            )
   183          )  and (
   184            changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
   185              ==
   186            0
   187          )
   188        for: 15m
   189        labels:
   190          severity: warning
   191  {{- if .Values.defaultRules.additionalRuleLabels }}
   192  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   193  {{- end }}
   194  {{- end }}
   195  {{- if not (.Values.defaultRules.disabled.KubeDaemonSetRolloutStuck | default false) }}
   196      - alert: KubeDaemonSetRolloutStuck
   197        annotations:
   198  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   199  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   200  {{- end }}
   201          description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.
   202          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck
   203          summary: DaemonSet rollout is stuck.
   204        expr: |-
   205          (
   206            (
   207              kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   208               !=
   209              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   210            ) or (
   211              kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   212               !=
   213              0
   214            ) or (
   215              kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   216               !=
   217              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   218            ) or (
   219              kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   220               !=
   221              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   222            )
   223          ) and (
   224            changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[5m])
   225              ==
   226            0
   227          )
   228        for: 15m
   229        labels:
   230          severity: warning
   231  {{- if .Values.defaultRules.additionalRuleLabels }}
   232  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   233  {{- end }}
   234  {{- end }}
   235  {{- if not (.Values.defaultRules.disabled.KubeContainerWaiting | default false) }}
   236      - alert: KubeContainerWaiting
   237        annotations:
   238  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   239  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   240  {{- end }}
   241          description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.
   242          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting
   243          summary: Pod container waiting longer than 1 hour
   244        expr: sum by (namespace, pod, container, cluster, verrazzano_cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}) > 0
   245        for: 1h
   246        labels:
   247          severity: warning
   248  {{- if .Values.defaultRules.additionalRuleLabels }}
   249  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   250  {{- end }}
   251  {{- end }}
   252  {{- if not (.Values.defaultRules.disabled.KubeDaemonSetNotScheduled | default false) }}
   253      - alert: KubeDaemonSetNotScheduled
   254        annotations:
   255  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   256  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   257  {{- end }}
   258          description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
   259          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled
   260          summary: DaemonSet pods are not scheduled.
   261        expr: |-
   262          kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   263            -
   264          kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
   265        for: 10m
   266        labels:
   267          severity: warning
   268  {{- if .Values.defaultRules.additionalRuleLabels }}
   269  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   270  {{- end }}
   271  {{- end }}
   272  {{- if not (.Values.defaultRules.disabled.KubeDaemonSetMisScheduled | default false) }}
   273      - alert: KubeDaemonSetMisScheduled
   274        annotations:
   275  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   276  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   277  {{- end }}
   278          description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
   279          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled
   280          summary: DaemonSet pods are misscheduled.
   281        expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
   282        for: 15m
   283        labels:
   284          severity: warning
   285  {{- if .Values.defaultRules.additionalRuleLabels }}
   286  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   287  {{- end }}
   288  {{- end }}
   289  {{- if not (.Values.defaultRules.disabled.KubeJobNotCompleted | default false) }}
   290      - alert: KubeJobNotCompleted
   291        annotations:
   292  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   293  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   294  {{- end }}
   295          description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.
   296          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted
   297          summary: Job did not complete in time
   298        expr: |-
   299          time() - max by (namespace, job_name, cluster, verrazzano_cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   300            and
   301          kube_job_status_active{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0) > 43200
   302        labels:
   303          severity: warning
   304  {{- if .Values.defaultRules.additionalRuleLabels }}
   305  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   306  {{- end }}
   307  {{- end }}
   308  {{- if not (.Values.defaultRules.disabled.KubeJobFailed | default false) }}
   309      - alert: KubeJobFailed
   310        annotations:
   311  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   312  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   313  {{- end }}
   314          description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.
   315          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed
   316          summary: Job failed to complete.
   317        expr: kube_job_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}  > 0
   318        for: 15m
   319        labels:
   320          severity: warning
   321  {{- if .Values.defaultRules.additionalRuleLabels }}
   322  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   323  {{- end }}
   324  {{- end }}
   325  {{- if not (.Values.defaultRules.disabled.KubeHpaReplicasMismatch | default false) }}
   326      - alert: KubeHpaReplicasMismatch
   327        annotations:
   328  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   329  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   330  {{- end }}
   331          description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler  {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
   332          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch
   333          summary: HPA has not matched desired number of replicas.
   334        expr: |-
   335          (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   336            !=
   337          kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
   338            and
   339          (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   340            >
   341          kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
   342            and
   343          (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   344            <
   345          kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
   346            and
   347          changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
   348        for: 15m
   349        labels:
   350          severity: warning
   351  {{- if .Values.defaultRules.additionalRuleLabels }}
   352  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   353  {{- end }}
   354  {{- end }}
   355  {{- if not (.Values.defaultRules.disabled.KubeHpaMaxedOut | default false) }}
   356      - alert: KubeHpaMaxedOut
   357        annotations:
   358  {{- if .Values.defaultRules.additionalRuleAnnotations }}
   359  {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
   360  {{- end }}
   361          description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler  {{`}}`}} has been running at max replicas for longer than 15 minutes.
   362          runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout
   363          summary: HPA is running at max replicas
   364        expr: |-
   365          kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   366            ==
   367          kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
   368        for: 15m
   369        labels:
   370          severity: warning
   371  {{- if .Values.defaultRules.additionalRuleLabels }}
   372  {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
   373  {{- end }}
   374  {{- end }}
   375  {{- end }}