github.com/thanos-io/thanos@v0.32.5/mixin/alerts/compact.libsonnet (about) 1 { 2 local thanos = self, 3 compact+:: { 4 selector: error 'must provide selector for Thanos Compact alerts', 5 compactionErrorThreshold: 5, 6 bucketOpsErrorThreshold: 5, 7 dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']), 8 }, 9 prometheusAlerts+:: { 10 groups+: if thanos.compact == null then [] else [ 11 local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in %s' % std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else ''; 12 { 13 name: 'thanos-compact', 14 rules: [ 15 { 16 alert: 'ThanosCompactMultipleRunning', 17 annotations: { 18 description: 'No more than one Thanos Compact instance should be running at once. There are {{$value}}%s instances running.' % location, 19 summary: 'Thanos Compact has multiple instances running.', 20 }, 21 expr: 'sum by (%(dimensions)s) (up{%(selector)s}) > 1' % thanos.compact, 22 'for': '5m', 23 labels: { 24 severity: 'warning', 25 }, 26 }, 27 { 28 alert: 'ThanosCompactHalted', 29 annotations: { 30 description: 'Thanos Compact {{$labels.job}}%s has failed to run and now is halted.' % location, 31 summary: 'Thanos Compact has failed to run and is now halted.', 32 }, 33 expr: 'thanos_compact_halted{%(selector)s} == 1' % thanos.compact, 34 'for': '5m', 35 labels: { 36 severity: 'warning', 37 }, 38 }, 39 { 40 alert: 'ThanosCompactHighCompactionFailures', 41 annotations: { 42 description: 'Thanos Compact {{$labels.job}}%s is failing to execute {{$value | humanize}}%% of compactions.' % location, 43 summary: 'Thanos Compact is failing to execute compactions.', 44 }, 45 expr: ||| 46 ( 47 sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_failures_total{%(selector)s}[5m])) 48 / 49 sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_total{%(selector)s}[5m])) 50 * 100 > %(compactionErrorThreshold)s 51 ) 52 ||| % thanos.compact, 53 'for': '15m', 54 labels: { 55 severity: 'warning', 56 }, 57 }, 58 { 59 alert: 'ThanosCompactBucketHighOperationFailures', 60 annotations: { 61 description: 'Thanos Compact {{$labels.job}}%s Bucket is failing to execute {{$value | humanize}}%% of operations.' % location, 62 summary: 'Thanos Compact Bucket is having a high number of operation failures.', 63 }, 64 expr: ||| 65 ( 66 sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m])) 67 / 68 sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m])) 69 * 100 > %(bucketOpsErrorThreshold)s 70 ) 71 ||| % thanos.compact, 72 'for': '15m', 73 labels: { 74 severity: 'warning', 75 }, 76 }, 77 { 78 alert: 'ThanosCompactHasNotRun', 79 annotations: { 80 description: 'Thanos Compact {{$labels.job}}%s has not uploaded anything for 24 hours.' % location, 81 summary: 'Thanos Compact has not uploaded anything for last 24 hours.', 82 }, 83 expr: '(time() - max by (%(dimensions)s) (max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact, 84 labels: { 85 severity: 'warning', 86 }, 87 }, 88 ], 89 }, 90 ], 91 }, 92 }