github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/production/loki-mixin/alerts.libsonnet (about) 1 { 2 prometheusAlerts+:: { 3 groups+: [ 4 { 5 name: 'loki_alerts', 6 rules: [ 7 { 8 alert: 'LokiRequestErrors', 9 expr: ||| 10 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) 11 / 12 sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) 13 > 10 14 |||, 15 'for': '15m', 16 labels: { 17 severity: 'critical', 18 }, 19 annotations: { 20 message: ||| 21 {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. 22 |||, 23 }, 24 }, 25 { 26 alert: 'LokiRequestPanics', 27 expr: ||| 28 sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 29 |||, 30 labels: { 31 severity: 'critical', 32 }, 33 annotations: { 34 message: ||| 35 {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. 36 |||, 37 }, 38 }, 39 { 40 alert: 'LokiRequestLatency', 41 expr: ||| 42 namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1 43 |||, 44 'for': '15m', 45 labels: { 46 severity: 'critical', 47 }, 48 annotations: { 49 message: ||| 50 {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. 51 |||, 52 }, 53 }, 54 { 55 alert: 'LokiTooManyCompactorsRunning', 56 expr: ||| 57 sum(loki_boltdb_shipper_compactor_running) by (namespace) > 1 58 |||, 59 'for': '5m', 60 labels: { 61 severity: 'warning', 62 }, 63 annotations: { 64 message: ||| 65 {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. 66 |||, 67 }, 68 }, 69 ], 70 }, 71 ], 72 }, 73 }