github.com/thanos-io/thanos@v0.32.5/mixin/dashboards/query.libsonnet (about)

     1  local g = import '../lib/thanos-grafana-builder/builder.libsonnet';
     2  local utils = import '../lib/utils.libsonnet';
     3  
     4  {
     5    local thanos = self,
     6    query+:: {
     7      selector: error 'must provide selector for Thanos Query dashboard',
     8      title: error 'must provide title for Thanos Query dashboard',
     9      dashboard:: {
    10        selector: std.join(', ', thanos.dashboard.selector + ['job=~"$job"']),
    11        dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']),
    12      },
    13    },
    14    grafanaDashboards+:: {
    15      [if thanos.query != null then 'query.json']:
    16        local queryHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query"']);
    17        local queryRangeHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query_range"']);
    18        local grpcUnarySelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="unary"']);
    19        local grpcServerStreamSelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="server_stream"']);
    20        g.dashboard(thanos.query.title)
    21        .addRow(
    22          g.row('Instant Query API')
    23          .addPanel(
    24            g.panel('Rate', 'Shows rate of requests against /query for the given time.') +
    25            g.httpQpsPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions)
    26          )
    27          .addPanel(
    28            g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests against /query.') +
    29            g.httpErrPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions)
    30          )
    31          .addPanel(
    32            g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') +
    33            g.latencyPanel('http_request_duration_seconds', queryHandlerSelector, thanos.query.dashboard.dimensions)
    34          )
    35        )
    36        .addRow(
    37          g.row('Range Query API')
    38          .addPanel(
    39            g.panel('Rate', 'Shows rate of requests against /query_range for the given time range.') +
    40            g.httpQpsPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions)
    41          )
    42          .addPanel(
    43            g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests against /query_range.') +
    44            g.httpErrPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions)
    45          )
    46          .addPanel(
    47            g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') +
    48            g.latencyPanel('http_request_duration_seconds', queryRangeHandlerSelector, thanos.query.dashboard.dimensions)
    49          )
    50        )
    51        .addRow(
    52          g.row('gRPC (Unary)')
    53          .addPanel(
    54            g.panel('Rate', 'Shows rate of handled Unary gRPC requests from other queriers.') +
    55            g.grpcRequestsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions)
    56          )
    57          .addPanel(
    58            g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from other queriers.') +
    59            g.grpcErrorsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions)
    60          )
    61          .addPanel(
    62            g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles.') +
    63            g.latencyPanel('grpc_client_handling_seconds', grpcUnarySelector, thanos.query.dashboard.dimensions)
    64          )
    65        )
    66        .addRow(
    67          g.row('gRPC (Stream)')
    68          .addPanel(
    69            g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from other queriers.') +
    70            g.grpcRequestsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions)
    71          )
    72          .addPanel(
    73            g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from other queriers.') +
    74            g.grpcErrorsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions)
    75          )
    76          .addPanel(
    77            g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles') +
    78            g.latencyPanel('grpc_client_handling_seconds', grpcServerStreamSelector, thanos.query.dashboard.dimensions)
    79          )
    80        )
    81        .addRow(
    82          g.row('DNS')
    83          .addPanel(
    84            g.panel('Rate', 'Shows rate of DNS lookups to discover stores.') +
    85            g.queryPanel(
    86              'sum by (%s) (rate(thanos_query_store_apis_dns_lookups_total{%s}[$interval]))' % [thanos.query.dashboard.dimensions, thanos.query.dashboard.selector],
    87              'lookups {{job}}'
    88            )
    89          )
    90          .addPanel(
    91            g.panel('Errors', 'Shows ratio of failures compared to the total number of executed DNS lookups.') +
    92            g.qpsErrTotalPanel(
    93              'thanos_query_store_apis_dns_failures_total{%s}' % thanos.query.dashboard.selector,
    94              'thanos_query_store_apis_dns_lookups_total{%s}' % thanos.query.dashboard.selector,
    95              thanos.query.dashboard.dimensions
    96            )
    97          )
    98        )
    99        .addRow(
   100          g.row('Query Concurrency')
   101          .addPanel(
   102            g.panel('Concurrent Capacity', 'Shows available capacity of processing queries in parallel.') +
   103            g.queryPanel(
   104              'max_over_time(thanos_query_concurrent_gate_queries_max{%s}[$__rate_interval]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight{%s}[$__rate_interval])' % [thanos.query.dashboard.selector, thanos.query.dashboard.selector],
   105              '{{job}} - {{pod}}'
   106            )
   107          )
   108        )
   109        .addRow(
   110          g.resourceUtilizationRow(thanos.query.dashboard.selector, thanos.query.dashboard.dimensions)
   111        ),
   112  
   113      __overviewRows__+:: if thanos.query == null then [] else [
   114        g.row('Instant Query')
   115        .addPanel(
   116          g.panel('Requests Rate', 'Shows rate of requests against /query for the given time.') +
   117          g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) +
   118          g.addDashboardLink(thanos.query.title)
   119        )
   120        .addPanel(
   121          g.panel('Requests Errors', 'Shows ratio of errors compared to the total number of handled requests against /query.') +
   122          g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) +
   123          g.addDashboardLink(thanos.query.title)
   124        )
   125        .addPanel(
   126          g.sloLatency(
   127            'Latency 99th Percentile',
   128            'Shows how long has it taken to handle requests.',
   129            'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']),
   130            thanos.dashboard.overview.dimensions,
   131            0.99,
   132            0.5,
   133            1
   134          ) +
   135          g.addDashboardLink(thanos.query.title)
   136        ),
   137  
   138        g.row('Range Query')
   139        .addPanel(
   140          g.panel('Requests Rate', 'Shows rate of requests against /query_range for the given time range.') +
   141          g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) +
   142          g.addDashboardLink(thanos.query.title)
   143        )
   144        .addPanel(
   145          g.panel('Requests Errors', 'Shows ratio of errors compared to the total number of handled requests against /query_range.') +
   146          g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) +
   147          g.addDashboardLink(thanos.query.title)
   148        )
   149        .addPanel(
   150          g.sloLatency(
   151            'Latency 99th Percentile',
   152            'Shows how long has it taken to handle requests.',
   153            'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']),
   154            thanos.dashboard.overview.dimensions,
   155            0.99,
   156            0.5,
   157            1
   158          ) +
   159          g.addDashboardLink(thanos.query.title)
   160        ),
   161      ],
   162    },
   163  }