github.com/thanos-io/thanos@v0.32.5/pkg/querysharding/analyzer_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package querysharding
     5  
     6  import (
     7  	"sort"
     8  	"testing"
     9  
    10  	"github.com/prometheus/common/model"
    11  	"github.com/stretchr/testify/require"
    12  )
    13  
    14  func TestAnalyzeQuery(t *testing.T) {
    15  
    16  	type testCase struct {
    17  		name           string
    18  		expression     string
    19  		shardingLabels []string
    20  	}
    21  
    22  	nonShardable := []testCase{
    23  		{
    24  			name:       "aggregation",
    25  			expression: "sum(http_requests_total)",
    26  		},
    27  		{
    28  			name:       "outer aggregation with no grouping",
    29  			expression: "count(sum by (pod) (http_requests_total))",
    30  		},
    31  		{
    32  			name:       "outer aggregation with without grouping",
    33  			expression: "count(sum without (pod) (http_requests_total))",
    34  		},
    35  		{
    36  			name:       "binary expression",
    37  			expression: `http_requests_total{code="400"} / http_requests_total`,
    38  		},
    39  		{
    40  			name:       "binary expression with constant",
    41  			expression: `http_requests_total{code="400"} / 4`,
    42  		},
    43  		{
    44  			name:       "binary expression with empty vector matching",
    45  			expression: `http_requests_total{code="400"} / on () http_requests_total`,
    46  		},
    47  		{
    48  			name:       "binary aggregation with different grouping labels",
    49  			expression: `sum by (pod) (http_requests_total{code="400"}) / sum by (cluster) (http_requests_total)`,
    50  		},
    51  		{
    52  			name:       "multiple binary expressions",
    53  			expression: `(http_requests_total{code="400"} + http_requests_total{code="500"}) / http_requests_total`,
    54  		},
    55  		{
    56  			name: "multiple binary expressions with empty vector matchers",
    57  			expression: `
    58  (http_requests_total{code="400"} + on (cluster, pod) http_requests_total{code="500"})
    59  / on ()
    60  http_requests_total`,
    61  		},
    62  		{
    63  			name:       "aggregate by expression with label_replace, sharding label is dynamic",
    64  			expression: `sum by (dst_label) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`,
    65  		},
    66  		{
    67  			name:       "aggregate by expression with label_join, sharding label is dynamic",
    68  			expression: `sum by (dst_label) (label_join(metric, "dst_label", ",", "src_label"))`,
    69  		},
    70  		{
    71  			name:       "absent_over_time is not shardable",
    72  			expression: `sum by (url) (absent_over_time(http_requests_total{code="400"}[5m]))`,
    73  		},
    74  		{
    75  			name:       "absent is not shardable",
    76  			expression: `sum by (url) (absent(http_requests_total{code="400"}))`,
    77  		},
    78  		{
    79  			name:       "scalar is not shardable",
    80  			expression: `scalar(sum by (url) (http_requests_total{code="400"}))`,
    81  		},
    82  	}
    83  
    84  	shardableByLabels := []testCase{
    85  		{
    86  			name:           "aggregation with grouping",
    87  			expression:     "sum by (pod) (http_requests_total)",
    88  			shardingLabels: []string{"pod"},
    89  		},
    90  		{
    91  			name:           "multiple aggregations with grouping",
    92  			expression:     "max by (pod) (sum by (pod, cluster) (http_requests_total))",
    93  			shardingLabels: []string{"pod"},
    94  		},
    95  		{
    96  			name:           "binary expression with vector matching",
    97  			expression:     `http_requests_total{code="400"} / on (pod) http_requests_total`,
    98  			shardingLabels: []string{"pod"},
    99  		},
   100  		{
   101  			name:           "binary aggregation with same grouping labels",
   102  			expression:     `sum by (pod) (http_requests_total{code="400"}) / sum by (pod) (http_requests_total)`,
   103  			shardingLabels: []string{"pod"},
   104  		},
   105  		{
   106  			name:           "binary expression with vector matching and grouping",
   107  			expression:     `sum by (cluster, pod) (http_requests_total{code="400"}) / on (pod) sum by (cluster, pod) (http_requests_total)`,
   108  			shardingLabels: []string{"pod"},
   109  		},
   110  		{
   111  			name:           "binary expression with vector matching with outer aggregation",
   112  			expression:     `sum(http_requests_total{code="400"} * http_requests_total) by (pod)`,
   113  			shardingLabels: []string{"pod"},
   114  		},
   115  		{
   116  			name: "multiple binary expressions with vector matchers",
   117  			expression: `
   118  (http_requests_total{code="400"} + on (cluster, pod) http_requests_total{code="500"})
   119  / on (pod)
   120  http_requests_total`,
   121  			shardingLabels: []string{"pod"},
   122  		},
   123  		{
   124  			name: "multiple binary expressions with grouping",
   125  			expression: `
   126  sum by (container) (
   127  	(http_requests_total{code="400"} + on (cluster, pod, container) http_requests_total{code="500"})
   128  	/ on (pod, container)
   129  	http_requests_total
   130  )`,
   131  			shardingLabels: []string{"container"},
   132  		},
   133  		{
   134  			name:           "multiple binary expressions with grouping",
   135  			expression:     `(http_requests_total{code="400"} + on (pod) http_requests_total{code="500"}) / on (cluster, pod) http_requests_total`,
   136  			shardingLabels: []string{"pod"},
   137  		},
   138  		{
   139  			name:           "histogram quantile",
   140  			expression:     "histogram_quantile(0.95, sum(rate(metric[1m])) by (le, cluster))",
   141  			shardingLabels: []string{"cluster"},
   142  		},
   143  		{
   144  			name:           "subquery",
   145  			expression:     "sum(http_requests_total) by (pod, cluster) [1h:1m]",
   146  			shardingLabels: []string{"cluster", "pod"},
   147  		},
   148  		{
   149  			name:           "subquery with function",
   150  			expression:     "increase(sum(http_requests_total) by (pod, cluster) [1h:1m])",
   151  			shardingLabels: []string{"cluster", "pod"},
   152  		},
   153  		{
   154  			name:           "ignore vector matching with 2 aggregations",
   155  			expression:     `sum(rate(node_cpu_seconds_total[3h])) by (cluster_id, mode) / ignoring(mode) group_left sum(rate(node_cpu_seconds_total[3h])) by (cluster_id)`,
   156  			shardingLabels: []string{"cluster_id"},
   157  		},
   158  		{
   159  			name:           "aggregate by expression with label_replace, sharding label is not dynamic",
   160  			expression:     `sum by (pod) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`,
   161  			shardingLabels: []string{"pod"},
   162  		},
   163  		{
   164  			name:           "aggregate by expression with label_join, sharding label is not dynamic",
   165  			expression:     `sum by (pod) (label_join(metric, "dst_label", ",", "src_label"))`,
   166  			shardingLabels: []string{"pod"},
   167  		},
   168  		{
   169  			name:           "label_join and aggregation on multiple labels. Can be sharded by the static one",
   170  			expression:     `sum by (pod, dst_label) (label_join(metric, "dst_label", ",", "src_label"))`,
   171  			shardingLabels: []string{"pod"},
   172  		},
   173  		{
   174  			name:           "binary expression with vector matching and label_replace",
   175  			expression:     `http_requests_total{code="400"} / on (pod) label_replace(metric, "dst_label", "$1", "src_label", "re")`,
   176  			shardingLabels: []string{"pod"},
   177  		},
   178  		{
   179  			name:           "nested label joins",
   180  			expression:     `label_join(sum by (pod) (label_join(metric, "dst_label", ",", "src_label")), "dst_label1", ",", "dst_label")`,
   181  			shardingLabels: []string{"pod"},
   182  		},
   183  		{
   184  			name:           "complex query with label_replace, binary expr and aggregations on dynamic label",
   185  			expression:     `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[1d:5m])) by (instance, cluster) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[1d:5m])) by (node, cluster), "instance", "$1", "node", "(.*)")) by (instance, cluster)`,
   186  			shardingLabels: []string{"cluster"},
   187  		},
   188  		{
   189  			name:           "complex query with label_replace and nested aggregations",
   190  			expression:     `avg(label_replace(label_replace(avg(count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", }[1h] )*avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", }[1h] )) by (namespace,container,pod,node,cluster_id) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")) by (namespace,container_name,pod_name,node,cluster_id)`,
   191  			shardingLabels: []string{"namespace", "node", "cluster_id"},
   192  		},
   193  		{
   194  			name:           "complex query with label_replace, nested aggregations and binary expressions",
   195  			expression:     `sort_desc(avg(label_replace(label_replace(label_replace(count_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", }[1h] ), "node", "$1", "instance", "(.+)"), "container_name", "$1", "container", "(.+)"), "pod_name", "$1", "pod", "(.+)")*label_replace(label_replace(label_replace(avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", }[1h] ), "node", "$1", "instance", "(.+)"), "container_name", "$1", "container", "(.+)"), "pod_name", "$1", "pod", "(.+)")) by (namespace, container_name, pod_name, node, cluster_id))`,
   196  			shardingLabels: []string{"namespace", "cluster_id"},
   197  		},
   198  	}
   199  
   200  	shardableWithoutLabels := []testCase{
   201  		{
   202  			name:           "aggregation without grouping",
   203  			expression:     "sum without (pod) (http_requests_total)",
   204  			shardingLabels: []string{"pod"},
   205  		},
   206  		{
   207  			name:           "multiple aggregations with without grouping",
   208  			expression:     "max without (pod) (sum without (pod, cluster) (http_requests_total))",
   209  			shardingLabels: []string{"pod", "cluster"},
   210  		},
   211  		{
   212  			name:           "binary expression with outer without grouping",
   213  			expression:     `sum(http_requests_total{code="400"} * http_requests_total) without (pod)`,
   214  			shardingLabels: []string{"pod"},
   215  		},
   216  		{
   217  			name:           "binary expression with vector matching and outer without grouping",
   218  			expression:     `sum(http_requests_total{code="400"} * ignoring(cluster) http_requests_total) without ()`,
   219  			shardingLabels: []string{"__name__", "cluster"},
   220  		},
   221  		{
   222  			name:           "binary expression with without vector matching and grouping",
   223  			expression:     `sum without (cluster, pod) (http_requests_total{code="400"}) / ignoring (pod) sum without (cluster, pod) (http_requests_total)`,
   224  			shardingLabels: []string{"pod", "cluster", model.MetricNameLabel},
   225  		},
   226  		{
   227  			name:           "multiple binary expressions with without grouping",
   228  			expression:     `(http_requests_total{code="400"} + ignoring (pod) http_requests_total{code="500"}) / ignoring (cluster, pod) http_requests_total`,
   229  			shardingLabels: []string{"cluster", "pod", model.MetricNameLabel},
   230  		},
   231  		{
   232  			name: "multiple binary expressions with without vector matchers",
   233  			expression: `
   234  (http_requests_total{code="400"} + ignoring (cluster, pod) http_requests_total{code="500"})
   235  / ignoring (pod)
   236  http_requests_total`,
   237  			shardingLabels: []string{"cluster", "pod", model.MetricNameLabel},
   238  		},
   239  		{
   240  			name:           "histogram quantile",
   241  			expression:     "histogram_quantile(0.95, sum(rate(metric[1m])) without (le, cluster))",
   242  			shardingLabels: []string{"cluster"},
   243  		},
   244  		{
   245  			name:           "aggregate without expression with label_replace, sharding label is not dynamic",
   246  			expression:     `sum without (dst_label) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`,
   247  			shardingLabels: []string{"dst_label"},
   248  		},
   249  		{
   250  			name:           "aggregate without expression with label_join, sharding label is not dynamic",
   251  			expression:     `sum without (dst_label) (label_join(metric, "dst_label", ",", "src_label"))`,
   252  			shardingLabels: []string{"dst_label"},
   253  		},
   254  		{
   255  			name:           "aggregate without expression with label_replace",
   256  			expression:     `sum without (pod) (label_replace(metric, "dst_label", "$1", "src_label", "re"))`,
   257  			shardingLabels: []string{"pod", "dst_label"},
   258  		},
   259  	}
   260  
   261  	for _, test := range nonShardable {
   262  		t.Run(test.name, func(t *testing.T) {
   263  			analyzer := NewQueryAnalyzer()
   264  			analysis, err := analyzer.Analyze(test.expression)
   265  			require.NoError(t, err)
   266  			require.False(t, analysis.IsShardable())
   267  		})
   268  	}
   269  
   270  	for _, test := range shardableByLabels {
   271  		t.Run(test.name, func(t *testing.T) {
   272  			analyzer := NewQueryAnalyzer()
   273  			analysis, err := analyzer.Analyze(test.expression)
   274  			require.NoError(t, err)
   275  			require.True(t, analysis.IsShardable())
   276  			require.True(t, analysis.ShardBy())
   277  
   278  			sort.Strings(test.shardingLabels)
   279  			sort.Strings(analysis.ShardingLabels())
   280  			require.Equal(t, test.shardingLabels, analysis.ShardingLabels())
   281  		})
   282  	}
   283  
   284  	for _, test := range shardableWithoutLabels {
   285  		t.Run(test.name, func(t *testing.T) {
   286  			analyzer := NewQueryAnalyzer()
   287  			analysis, err := analyzer.Analyze(test.expression)
   288  			require.NoError(t, err)
   289  			require.True(t, analysis.IsShardable())
   290  			require.False(t, analysis.ShardBy())
   291  
   292  			sort.Strings(test.shardingLabels)
   293  			sort.Strings(analysis.ShardingLabels())
   294  			require.Equal(t, test.shardingLabels, analysis.ShardingLabels())
   295  		})
   296  	}
   297  }