github.com/thanos-io/thanos@v0.32.5/mixin/dashboards/query.libsonnet (about) 1 local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; 2 local utils = import '../lib/utils.libsonnet'; 3 4 { 5 local thanos = self, 6 query+:: { 7 selector: error 'must provide selector for Thanos Query dashboard', 8 title: error 'must provide title for Thanos Query dashboard', 9 dashboard:: { 10 selector: std.join(', ', thanos.dashboard.selector + ['job=~"$job"']), 11 dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), 12 }, 13 }, 14 grafanaDashboards+:: { 15 [if thanos.query != null then 'query.json']: 16 local queryHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query"']); 17 local queryRangeHandlerSelector = utils.joinLabels([thanos.query.dashboard.selector, 'handler="query_range"']); 18 local grpcUnarySelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="unary"']); 19 local grpcServerStreamSelector = utils.joinLabels([thanos.query.dashboard.selector, 'grpc_type="server_stream"']); 20 g.dashboard(thanos.query.title) 21 .addRow( 22 g.row('Instant Query API') 23 .addPanel( 24 g.panel('Rate', 'Shows rate of requests against /query for the given time.') + 25 g.httpQpsPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions) 26 ) 27 .addPanel( 28 g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests against /query.') + 29 g.httpErrPanel('http_requests_total', queryHandlerSelector, thanos.query.dashboard.dimensions) 30 ) 31 .addPanel( 32 g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + 33 g.latencyPanel('http_request_duration_seconds', queryHandlerSelector, thanos.query.dashboard.dimensions) 34 ) 35 ) 36 .addRow( 37 g.row('Range Query API') 38 .addPanel( 39 g.panel('Rate', 'Shows rate of requests against /query_range for the given time range.') + 40 g.httpQpsPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) 41 ) 42 .addPanel( 43 g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests against /query_range.') + 44 g.httpErrPanel('http_requests_total', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) 45 ) 46 .addPanel( 47 g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + 48 g.latencyPanel('http_request_duration_seconds', queryRangeHandlerSelector, thanos.query.dashboard.dimensions) 49 ) 50 ) 51 .addRow( 52 g.row('gRPC (Unary)') 53 .addPanel( 54 g.panel('Rate', 'Shows rate of handled Unary gRPC requests from other queriers.') + 55 g.grpcRequestsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions) 56 ) 57 .addPanel( 58 g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from other queriers.') + 59 g.grpcErrorsPanel('grpc_client_handled_total', grpcUnarySelector, thanos.query.dashboard.dimensions) 60 ) 61 .addPanel( 62 g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles.') + 63 g.latencyPanel('grpc_client_handling_seconds', grpcUnarySelector, thanos.query.dashboard.dimensions) 64 ) 65 ) 66 .addRow( 67 g.row('gRPC (Stream)') 68 .addPanel( 69 g.panel('Rate', 'Shows rate of handled Streamed gRPC requests from other queriers.') + 70 g.grpcRequestsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions) 71 ) 72 .addPanel( 73 g.panel('Errors', 'Shows ratio of errors compared to the total number of handled requests from other queriers.') + 74 g.grpcErrorsPanel('grpc_client_handled_total', grpcServerStreamSelector, thanos.query.dashboard.dimensions) 75 ) 76 .addPanel( 77 g.panel('Duration', 'Shows how long has it taken to handle requests from other queriers, in quantiles') + 78 g.latencyPanel('grpc_client_handling_seconds', grpcServerStreamSelector, thanos.query.dashboard.dimensions) 79 ) 80 ) 81 .addRow( 82 g.row('DNS') 83 .addPanel( 84 g.panel('Rate', 'Shows rate of DNS lookups to discover stores.') + 85 g.queryPanel( 86 'sum by (%s) (rate(thanos_query_store_apis_dns_lookups_total{%s}[$interval]))' % [thanos.query.dashboard.dimensions, thanos.query.dashboard.selector], 87 'lookups {{job}}' 88 ) 89 ) 90 .addPanel( 91 g.panel('Errors', 'Shows ratio of failures compared to the total number of executed DNS lookups.') + 92 g.qpsErrTotalPanel( 93 'thanos_query_store_apis_dns_failures_total{%s}' % thanos.query.dashboard.selector, 94 'thanos_query_store_apis_dns_lookups_total{%s}' % thanos.query.dashboard.selector, 95 thanos.query.dashboard.dimensions 96 ) 97 ) 98 ) 99 .addRow( 100 g.row('Query Concurrency') 101 .addPanel( 102 g.panel('Concurrent Capacity', 'Shows available capacity of processing queries in parallel.') + 103 g.queryPanel( 104 'max_over_time(thanos_query_concurrent_gate_queries_max{%s}[$__rate_interval]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight{%s}[$__rate_interval])' % [thanos.query.dashboard.selector, thanos.query.dashboard.selector], 105 '{{job}} - {{pod}}' 106 ) 107 ) 108 ) 109 .addRow( 110 g.resourceUtilizationRow(thanos.query.dashboard.selector, thanos.query.dashboard.dimensions) 111 ), 112 113 __overviewRows__+:: if thanos.query == null then [] else [ 114 g.row('Instant Query') 115 .addPanel( 116 g.panel('Requests Rate', 'Shows rate of requests against /query for the given time.') + 117 g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) + 118 g.addDashboardLink(thanos.query.title) 119 ) 120 .addPanel( 121 g.panel('Requests Errors', 'Shows ratio of errors compared to the total number of handled requests against /query.') + 122 g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), thanos.dashboard.overview.dimensions) + 123 g.addDashboardLink(thanos.query.title) 124 ) 125 .addPanel( 126 g.sloLatency( 127 'Latency 99th Percentile', 128 'Shows how long has it taken to handle requests.', 129 'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query"']), 130 thanos.dashboard.overview.dimensions, 131 0.99, 132 0.5, 133 1 134 ) + 135 g.addDashboardLink(thanos.query.title) 136 ), 137 138 g.row('Range Query') 139 .addPanel( 140 g.panel('Requests Rate', 'Shows rate of requests against /query_range for the given time range.') + 141 g.httpQpsPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) + 142 g.addDashboardLink(thanos.query.title) 143 ) 144 .addPanel( 145 g.panel('Requests Errors', 'Shows ratio of errors compared to the total number of handled requests against /query_range.') + 146 g.httpErrPanel('http_requests_total', utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), thanos.dashboard.overview.dimensions) + 147 g.addDashboardLink(thanos.query.title) 148 ) 149 .addPanel( 150 g.sloLatency( 151 'Latency 99th Percentile', 152 'Shows how long has it taken to handle requests.', 153 'http_request_duration_seconds_bucket{%s}' % utils.joinLabels([thanos.dashboard.overview.selector, 'handler="query_range"']), 154 thanos.dashboard.overview.dimensions, 155 0.99, 156 0.5, 157 1 158 ) + 159 g.addDashboardLink(thanos.query.title) 160 ), 161 ], 162 }, 163 }