github.com/verrazzano/verrazzano@v1.7.0/platform-operator/thirdparty/charts/thanos/templates/alert-rule/sidecar.yml (about) 1 {{- /* 2 Generated from https://github.com/thanos-io/thanos/blob/main/examples/alerts/alerts.md 3 */ -}} 4 {{- if and .Values.metrics.enabled (or .Values.metrics.prometheusRule.default.create .Values.metrics.prometheusRule.default.sidecar ) ( .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" ) }} 5 apiVersion: monitoring.coreos.com/v1 6 kind: PrometheusRule 7 metadata: 8 name: {{ template "common.names.fullname" . }}-sidecar 9 namespace: {{ default .Release.Namespace .Values.metrics.prometheusRule.namespace | quote }} 10 labels: {{- include "common.labels.standard" . | nindent 4 }} 11 {{- if .Values.metrics.prometheusRule.additionalLabels }} 12 {{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 4 }} 13 {{- end }} 14 {{- if .Values.commonLabels }} 15 {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} 16 {{- end }} 17 {{- if .Values.commonAnnotations }} 18 annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} 19 {{- end }} 20 spec: 21 groups: 22 - name: thanos-sidecar 23 rules: 24 {{- if not (.Values.metrics.prometheusRule.default.disabled.ThanosSidecarBucketOperationsFailed | default false) }} 25 - alert: ThanosSidecarBucketOperationsFailed 26 annotations: 27 {{- if .Values.commonAnnotations }} 28 {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 8 }} 29 {{- end }} 30 description: Thanos Sidecar {{`{{`}} $labels.instance {{`}}`}} bucket operations are failing 31 runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarbucketoperationsfailed 32 summary: Thanos Sidecar bucket operations are failing 33 expr: | 34 sum by (job, instance, verrazzano_cluster) (rate(thanos_objstore_bucket_operation_failures_total{job=~".*thanos-discovery.*"}[5m])) > 0 35 for: 5m 36 labels: 37 severity: critical 38 {{- if .Values.metrics.prometheusRule.additionalLabels }} 39 {{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 8 }} 40 {{- end }} 41 {{- end }} 42 {{- if not (.Values.metrics.prometheusRule.default.disabled.ThanosSidecarNoConnectionToStartedPrometheus | default false) }} 43 - alert: ThanosSidecarNoConnectionToStartedPrometheus 44 annotations: 45 {{- if .Values.commonAnnotations }} 46 {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 8 }} 47 {{- end }} 48 description: Thanos Sidecar {{`{{`}} $labels.instance {{`}}`}} is unhealthy. 49 runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus 50 summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems healthy and has reloaded WAL. 51 expr: | 52 thanos_sidecar_prometheus_up{job=~".*thanos-discovery.*"} == 0 53 AND on (namespace, pod, verrazzano_cluster) 54 prometheus_tsdb_data_replay_duration_seconds != 0 55 for: 5m 56 labels: 57 severity: critical 58 {{- if .Values.metrics.prometheusRule.additionalLabels }} 59 {{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 8 }} 60 {{- end }} 61 {{- end }} 62 {{- end }}