github.com/thanos-io/thanos@v0.32.5/pkg/querysharding/analyzer_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package querysharding 5 6 import ( 7 "sort" 8 "testing" 9 10 "github.com/prometheus/common/model" 11 "github.com/stretchr/testify/require" 12 ) 13 14 func TestAnalyzeQuery(t *testing.T) { 15 16 type testCase struct { 17 name string 18 expression string 19 shardingLabels []string 20 } 21 22 nonShardable := []testCase{ 23 { 24 name: "aggregation", 25 expression: "sum(http_requests_total)", 26 }, 27 { 28 name: "outer aggregation with no grouping", 29 expression: "count(sum by (pod) (http_requests_total))", 30 }, 31 { 32 name: "outer aggregation with without grouping", 33 expression: "count(sum without (pod) (http_requests_total))", 34 }, 35 { 36 name: "binary expression", 37 expression: `http_requests_total{code="400"} / http_requests_total`, 38 }, 39 { 40 name: "binary expression with constant", 41 expression: `http_requests_total{code="400"} / 4`, 42 }, 43 { 44 name: "binary expression with empty vector matching", 45 expression: `http_requests_total{code="400"} / on () http_requests_total`, 46 }, 47 { 48 name: "binary aggregation with different grouping labels", 49 expression: `sum by (pod) (http_requests_total{code="400"}) / sum by (cluster) (http_requests_total)`, 50 }, 51 { 52 name: "multiple binary expressions", 53 expression: `(http_requests_total{code="400"} + http_requests_total{code="500"}) / http_requests_total`, 54 }, 55 { 56 name: "multiple binary expressions with empty vector matchers", 57 expression: ` 58 (http_requests_total{code="400"} + on (cluster, pod) http_requests_total{code="500"}) 59 / on () 60 http_requests_total`, 61 }, 62 { 63 name: "aggregate by expression with label_replace, sharding label is dynamic", 64 expression: `sum by (dst_label) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`, 65 }, 66 { 67 name: "aggregate by expression with label_join, sharding label is dynamic", 68 expression: `sum by (dst_label) (label_join(metric, "dst_label", ",", "src_label"))`, 69 }, 70 { 71 name: "absent_over_time is not shardable", 72 expression: `sum by (url) (absent_over_time(http_requests_total{code="400"}[5m]))`, 73 }, 74 { 75 name: "absent is not shardable", 76 expression: `sum by (url) (absent(http_requests_total{code="400"}))`, 77 }, 78 { 79 name: "scalar is not shardable", 80 expression: `scalar(sum by (url) (http_requests_total{code="400"}))`, 81 }, 82 } 83 84 shardableByLabels := []testCase{ 85 { 86 name: "aggregation with grouping", 87 expression: "sum by (pod) (http_requests_total)", 88 shardingLabels: []string{"pod"}, 89 }, 90 { 91 name: "multiple aggregations with grouping", 92 expression: "max by (pod) (sum by (pod, cluster) (http_requests_total))", 93 shardingLabels: []string{"pod"}, 94 }, 95 { 96 name: "binary expression with vector matching", 97 expression: `http_requests_total{code="400"} / on (pod) http_requests_total`, 98 shardingLabels: []string{"pod"}, 99 }, 100 { 101 name: "binary aggregation with same grouping labels", 102 expression: `sum by (pod) (http_requests_total{code="400"}) / sum by (pod) (http_requests_total)`, 103 shardingLabels: []string{"pod"}, 104 }, 105 { 106 name: "binary expression with vector matching and grouping", 107 expression: `sum by (cluster, pod) (http_requests_total{code="400"}) / on (pod) sum by (cluster, pod) (http_requests_total)`, 108 shardingLabels: []string{"pod"}, 109 }, 110 { 111 name: "binary expression with vector matching with outer aggregation", 112 expression: `sum(http_requests_total{code="400"} * http_requests_total) by (pod)`, 113 shardingLabels: []string{"pod"}, 114 }, 115 { 116 name: "multiple binary expressions with vector matchers", 117 expression: ` 118 (http_requests_total{code="400"} + on (cluster, pod) http_requests_total{code="500"}) 119 / on (pod) 120 http_requests_total`, 121 shardingLabels: []string{"pod"}, 122 }, 123 { 124 name: "multiple binary expressions with grouping", 125 expression: ` 126 sum by (container) ( 127 (http_requests_total{code="400"} + on (cluster, pod, container) http_requests_total{code="500"}) 128 / on (pod, container) 129 http_requests_total 130 )`, 131 shardingLabels: []string{"container"}, 132 }, 133 { 134 name: "multiple binary expressions with grouping", 135 expression: `(http_requests_total{code="400"} + on (pod) http_requests_total{code="500"}) / on (cluster, pod) http_requests_total`, 136 shardingLabels: []string{"pod"}, 137 }, 138 { 139 name: "histogram quantile", 140 expression: "histogram_quantile(0.95, sum(rate(metric[1m])) by (le, cluster))", 141 shardingLabels: []string{"cluster"}, 142 }, 143 { 144 name: "subquery", 145 expression: "sum(http_requests_total) by (pod, cluster) [1h:1m]", 146 shardingLabels: []string{"cluster", "pod"}, 147 }, 148 { 149 name: "subquery with function", 150 expression: "increase(sum(http_requests_total) by (pod, cluster) [1h:1m])", 151 shardingLabels: []string{"cluster", "pod"}, 152 }, 153 { 154 name: "ignore vector matching with 2 aggregations", 155 expression: `sum(rate(node_cpu_seconds_total[3h])) by (cluster_id, mode) / ignoring(mode) group_left sum(rate(node_cpu_seconds_total[3h])) by (cluster_id)`, 156 shardingLabels: []string{"cluster_id"}, 157 }, 158 { 159 name: "aggregate by expression with label_replace, sharding label is not dynamic", 160 expression: `sum by (pod) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`, 161 shardingLabels: []string{"pod"}, 162 }, 163 { 164 name: "aggregate by expression with label_join, sharding label is not dynamic", 165 expression: `sum by (pod) (label_join(metric, "dst_label", ",", "src_label"))`, 166 shardingLabels: []string{"pod"}, 167 }, 168 { 169 name: "label_join and aggregation on multiple labels. Can be sharded by the static one", 170 expression: `sum by (pod, dst_label) (label_join(metric, "dst_label", ",", "src_label"))`, 171 shardingLabels: []string{"pod"}, 172 }, 173 { 174 name: "binary expression with vector matching and label_replace", 175 expression: `http_requests_total{code="400"} / on (pod) label_replace(metric, "dst_label", "$1", "src_label", "re")`, 176 shardingLabels: []string{"pod"}, 177 }, 178 { 179 name: "nested label joins", 180 expression: `label_join(sum by (pod) (label_join(metric, "dst_label", ",", "src_label")), "dst_label1", ",", "dst_label")`, 181 shardingLabels: []string{"pod"}, 182 }, 183 { 184 name: "complex query with label_replace, binary expr and aggregations on dynamic label", 185 expression: `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[1d:5m])) by (instance, cluster) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[1d:5m])) by (node, cluster), "instance", "$1", "node", "(.*)")) by (instance, cluster)`, 186 shardingLabels: []string{"cluster"}, 187 }, 188 { 189 name: "complex query with label_replace and nested aggregations", 190 expression: `avg(label_replace(label_replace(avg(count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", }[1h] )*avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", }[1h] )) by (namespace,container,pod,node,cluster_id) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")) by (namespace,container_name,pod_name,node,cluster_id)`, 191 shardingLabels: []string{"namespace", "node", "cluster_id"}, 192 }, 193 { 194 name: "complex query with label_replace, nested aggregations and binary expressions", 195 expression: `sort_desc(avg(label_replace(label_replace(label_replace(count_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", }[1h] ), "node", "$1", "instance", "(.+)"), "container_name", "$1", "container", "(.+)"), "pod_name", "$1", "pod", "(.+)")*label_replace(label_replace(label_replace(avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", }[1h] ), "node", "$1", "instance", "(.+)"), "container_name", "$1", "container", "(.+)"), "pod_name", "$1", "pod", "(.+)")) by (namespace, container_name, pod_name, node, cluster_id))`, 196 shardingLabels: []string{"namespace", "cluster_id"}, 197 }, 198 } 199 200 shardableWithoutLabels := []testCase{ 201 { 202 name: "aggregation without grouping", 203 expression: "sum without (pod) (http_requests_total)", 204 shardingLabels: []string{"pod"}, 205 }, 206 { 207 name: "multiple aggregations with without grouping", 208 expression: "max without (pod) (sum without (pod, cluster) (http_requests_total))", 209 shardingLabels: []string{"pod", "cluster"}, 210 }, 211 { 212 name: "binary expression with outer without grouping", 213 expression: `sum(http_requests_total{code="400"} * http_requests_total) without (pod)`, 214 shardingLabels: []string{"pod"}, 215 }, 216 { 217 name: "binary expression with vector matching and outer without grouping", 218 expression: `sum(http_requests_total{code="400"} * ignoring(cluster) http_requests_total) without ()`, 219 shardingLabels: []string{"__name__", "cluster"}, 220 }, 221 { 222 name: "binary expression with without vector matching and grouping", 223 expression: `sum without (cluster, pod) (http_requests_total{code="400"}) / ignoring (pod) sum without (cluster, pod) (http_requests_total)`, 224 shardingLabels: []string{"pod", "cluster", model.MetricNameLabel}, 225 }, 226 { 227 name: "multiple binary expressions with without grouping", 228 expression: `(http_requests_total{code="400"} + ignoring (pod) http_requests_total{code="500"}) / ignoring (cluster, pod) http_requests_total`, 229 shardingLabels: []string{"cluster", "pod", model.MetricNameLabel}, 230 }, 231 { 232 name: "multiple binary expressions with without vector matchers", 233 expression: ` 234 (http_requests_total{code="400"} + ignoring (cluster, pod) http_requests_total{code="500"}) 235 / ignoring (pod) 236 http_requests_total`, 237 shardingLabels: []string{"cluster", "pod", model.MetricNameLabel}, 238 }, 239 { 240 name: "histogram quantile", 241 expression: "histogram_quantile(0.95, sum(rate(metric[1m])) without (le, cluster))", 242 shardingLabels: []string{"cluster"}, 243 }, 244 { 245 name: "aggregate without expression with label_replace, sharding label is not dynamic", 246 expression: `sum without (dst_label) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`, 247 shardingLabels: []string{"dst_label"}, 248 }, 249 { 250 name: "aggregate without expression with label_join, sharding label is not dynamic", 251 expression: `sum without (dst_label) (label_join(metric, "dst_label", ",", "src_label"))`, 252 shardingLabels: []string{"dst_label"}, 253 }, 254 { 255 name: "aggregate without expression with label_replace", 256 expression: `sum without (pod) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`, 257 shardingLabels: []string{"pod", "dst_label"}, 258 }, 259 } 260 261 for _, test := range nonShardable { 262 t.Run(test.name, func(t *testing.T) { 263 analyzer := NewQueryAnalyzer() 264 analysis, err := analyzer.Analyze(test.expression) 265 require.NoError(t, err) 266 require.False(t, analysis.IsShardable()) 267 }) 268 } 269 270 for _, test := range shardableByLabels { 271 t.Run(test.name, func(t *testing.T) { 272 analyzer := NewQueryAnalyzer() 273 analysis, err := analyzer.Analyze(test.expression) 274 require.NoError(t, err) 275 require.True(t, analysis.IsShardable()) 276 require.True(t, analysis.ShardBy()) 277 278 sort.Strings(test.shardingLabels) 279 sort.Strings(analysis.ShardingLabels()) 280 require.Equal(t, test.shardingLabels, analysis.ShardingLabels()) 281 }) 282 } 283 284 for _, test := range shardableWithoutLabels { 285 t.Run(test.name, func(t *testing.T) { 286 analyzer := NewQueryAnalyzer() 287 analysis, err := analyzer.Analyze(test.expression) 288 require.NoError(t, err) 289 require.True(t, analysis.IsShardable()) 290 require.False(t, analysis.ShardBy()) 291 292 sort.Strings(test.shardingLabels) 293 sort.Strings(analysis.ShardingLabels()) 294 require.Equal(t, test.shardingLabels, analysis.ShardingLabels()) 295 }) 296 } 297 }