github.com/thanos-io/thanos@v0.32.5/mixin/alerts/compact.libsonnet (about)

     1  {
     2    local thanos = self,
     3    compact+:: {
     4      selector: error 'must provide selector for Thanos Compact alerts',
     5      compactionErrorThreshold: 5,
     6      bucketOpsErrorThreshold: 5,
     7      dimensions: std.join(', ', std.objectFields(thanos.targetGroups) + ['job']),
     8    },
     9    prometheusAlerts+:: {
    10      groups+: if thanos.compact == null then [] else [
    11        local location = if std.length(std.objectFields(thanos.targetGroups)) > 0 then ' in %s' % std.join('/', ['{{$labels.%s}}' % level for level in std.objectFields(thanos.targetGroups)]) else '';
    12        {
    13          name: 'thanos-compact',
    14          rules: [
    15            {
    16              alert: 'ThanosCompactMultipleRunning',
    17              annotations: {
    18                description: 'No more than one Thanos Compact instance should be running at once. There are {{$value}}%s instances running.' % location,
    19                summary: 'Thanos Compact has multiple instances running.',
    20              },
    21              expr: 'sum by (%(dimensions)s) (up{%(selector)s}) > 1' % thanos.compact,
    22              'for': '5m',
    23              labels: {
    24                severity: 'warning',
    25              },
    26            },
    27            {
    28              alert: 'ThanosCompactHalted',
    29              annotations: {
    30                description: 'Thanos Compact {{$labels.job}}%s has failed to run and now is halted.' % location,
    31                summary: 'Thanos Compact has failed to run and is now halted.',
    32              },
    33              expr: 'thanos_compact_halted{%(selector)s} == 1' % thanos.compact,
    34              'for': '5m',
    35              labels: {
    36                severity: 'warning',
    37              },
    38            },
    39            {
    40              alert: 'ThanosCompactHighCompactionFailures',
    41              annotations: {
    42                description: 'Thanos Compact {{$labels.job}}%s is failing to execute {{$value | humanize}}%% of compactions.' % location,
    43                summary: 'Thanos Compact is failing to execute compactions.',
    44              },
    45              expr: |||
    46                (
    47                  sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_failures_total{%(selector)s}[5m]))
    48                /
    49                  sum by (%(dimensions)s) (rate(thanos_compact_group_compactions_total{%(selector)s}[5m]))
    50                * 100 > %(compactionErrorThreshold)s
    51                )
    52              ||| % thanos.compact,
    53              'for': '15m',
    54              labels: {
    55                severity: 'warning',
    56              },
    57            },
    58            {
    59              alert: 'ThanosCompactBucketHighOperationFailures',
    60              annotations: {
    61                description: 'Thanos Compact {{$labels.job}}%s Bucket is failing to execute {{$value | humanize}}%% of operations.' % location,
    62                summary: 'Thanos Compact Bucket is having a high number of operation failures.',
    63              },
    64              expr: |||
    65                (
    66                  sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operation_failures_total{%(selector)s}[5m]))
    67                /
    68                  sum by (%(dimensions)s) (rate(thanos_objstore_bucket_operations_total{%(selector)s}[5m]))
    69                * 100 > %(bucketOpsErrorThreshold)s
    70                )
    71              ||| % thanos.compact,
    72              'for': '15m',
    73              labels: {
    74                severity: 'warning',
    75              },
    76            },
    77            {
    78              alert: 'ThanosCompactHasNotRun',
    79              annotations: {
    80                description: 'Thanos Compact {{$labels.job}}%s has not uploaded anything for 24 hours.' % location,
    81                summary: 'Thanos Compact has not uploaded anything for last 24 hours.',
    82              },
    83              expr: '(time() - max by (%(dimensions)s) (max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact,
    84              labels: {
    85                severity: 'warning',
    86              },
    87            },
    88          ],
    89        },
    90      ],
    91    },
    92  }