github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/chunk/schema_test.go (about)

     1  package chunk
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/base64"
     6  	"fmt"
     7  	"reflect"
     8  	"sort"
     9  	"testing"
    10  
    11  	jsoniter "github.com/json-iterator/go"
    12  	"github.com/prometheus/common/model"
    13  	"github.com/prometheus/prometheus/pkg/labels"
    14  	"github.com/prometheus/prometheus/promql/parser"
    15  	"github.com/stretchr/testify/require"
    16  	"github.com/weaveworks/common/test"
    17  
    18  	"github.com/cortexproject/cortex/pkg/querier/astmapper"
    19  )
    20  
    21  type ByHashRangeKey []IndexEntry
    22  
    23  func (a ByHashRangeKey) Len() int      { return len(a) }
    24  func (a ByHashRangeKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
    25  func (a ByHashRangeKey) Less(i, j int) bool {
    26  	if a[i].HashValue != a[j].HashValue {
    27  		return a[i].HashValue < a[j].HashValue
    28  	}
    29  	return bytes.Compare(a[i].RangeValue, a[j].RangeValue) < 0
    30  }
    31  
    32  func mergeResults(rss ...[]IndexEntry) []IndexEntry {
    33  	results := []IndexEntry{}
    34  	for _, rs := range rss {
    35  		results = append(results, rs...)
    36  	}
    37  	return results
    38  }
    39  
    40  const table = "table"
    41  
    42  func mustMakeSchema(schemaName string) BaseSchema {
    43  	s, err := PeriodConfig{
    44  		Schema:      schemaName,
    45  		IndexTables: PeriodicTableConfig{Prefix: table},
    46  	}.CreateSchema()
    47  	if err != nil {
    48  		panic(err)
    49  	}
    50  	return s
    51  }
    52  
    53  func makeSeriesStoreSchema(schemaName string) SeriesStoreSchema {
    54  	return mustMakeSchema(schemaName).(SeriesStoreSchema)
    55  }
    56  
    57  func makeStoreSchema(schemaName string) StoreSchema {
    58  	return mustMakeSchema(schemaName).(StoreSchema)
    59  }
    60  
    61  func TestSchemaHashKeys(t *testing.T) {
    62  	mkResult := func(tableName, fmtStr string, from, through int) []IndexEntry {
    63  		want := []IndexEntry{}
    64  		for i := from; i < through; i++ {
    65  			want = append(want, IndexEntry{
    66  				TableName: tableName,
    67  				HashValue: fmt.Sprintf(fmtStr, i),
    68  			})
    69  		}
    70  		return want
    71  	}
    72  
    73  	const (
    74  		userID         = "userid"
    75  		periodicPrefix = "periodicPrefix"
    76  	)
    77  
    78  	hourlyBuckets := makeStoreSchema("v1")
    79  	dailyBuckets := makeStoreSchema("v3")
    80  	labelBuckets := makeStoreSchema("v4")
    81  	metric := labels.Labels{
    82  		{Name: model.MetricNameLabel, Value: "foo"},
    83  		{Name: "bar", Value: "baz"},
    84  	}
    85  	chunkID := "chunkID"
    86  
    87  	for i, tc := range []struct {
    88  		StoreSchema
    89  		from, through int64
    90  		metricName    string
    91  		want          []IndexEntry
    92  	}{
    93  		// Basic test case for the various bucketing schemes
    94  		{
    95  			hourlyBuckets,
    96  			0, (30 * 60) - 1, "foo", // chunk is smaller than bucket
    97  			mkResult(table, "userid:%d:foo", 0, 1),
    98  		},
    99  		{
   100  			hourlyBuckets,
   101  			0, (3 * 24 * 60 * 60) - 1, "foo",
   102  			mkResult(table, "userid:%d:foo", 0, 3*24),
   103  		},
   104  		{
   105  			hourlyBuckets,
   106  			0, 30 * 60, "foo", // chunk is smaller than bucket
   107  			mkResult(table, "userid:%d:foo", 0, 1),
   108  		},
   109  		{
   110  			dailyBuckets,
   111  			0, (3 * 24 * 60 * 60) - 1, "foo",
   112  			mkResult(table, "userid:d%d:foo", 0, 3),
   113  		},
   114  		{
   115  			labelBuckets,
   116  			0, (3 * 24 * 60 * 60) - 1, "foo",
   117  			mergeResults(
   118  				mkResult(table, "userid:d%d:foo", 0, 3),
   119  				mkResult(table, "userid:d%d:foo:bar", 0, 3),
   120  			),
   121  		},
   122  	} {
   123  		t.Run(fmt.Sprintf("TestSchemaHashKeys[%d]", i), func(t *testing.T) {
   124  			have, err := tc.StoreSchema.GetWriteEntries(
   125  				model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through),
   126  				userID, tc.metricName,
   127  				metric, chunkID,
   128  			)
   129  			if err != nil {
   130  				t.Fatal(err)
   131  			}
   132  			for i := range have {
   133  				have[i].RangeValue = nil
   134  			}
   135  			sort.Sort(ByHashRangeKey(have))
   136  			sort.Sort(ByHashRangeKey(tc.want))
   137  			if !reflect.DeepEqual(tc.want, have) {
   138  				t.Fatalf("wrong hash buckets - %s", test.Diff(tc.want, have))
   139  			}
   140  		})
   141  	}
   142  }
   143  
   144  // range value types
   145  const (
   146  	_ = iota
   147  	MetricNameRangeValue
   148  	ChunkTimeRangeValue
   149  	SeriesRangeValue
   150  )
   151  
   152  // parseRangeValueType returns the type of rangeValue
   153  func parseRangeValueType(rangeValue []byte) (int, error) {
   154  	components := decodeRangeKey(rangeValue, make([][]byte, 0, 5))
   155  	switch {
   156  	case len(components) < 3:
   157  		return 0, fmt.Errorf("invalid range value: %x", rangeValue)
   158  
   159  	// v1 & v2 chunk time range values
   160  	case len(components) == 3:
   161  		return ChunkTimeRangeValue, nil
   162  
   163  	// chunk time range values
   164  	case len(components[3]) == 1:
   165  		switch components[3][0] {
   166  		case chunkTimeRangeKeyV1:
   167  			return ChunkTimeRangeValue, nil
   168  
   169  		case chunkTimeRangeKeyV2:
   170  			return ChunkTimeRangeValue, nil
   171  
   172  		case chunkTimeRangeKeyV3:
   173  			return ChunkTimeRangeValue, nil
   174  
   175  		case chunkTimeRangeKeyV4:
   176  			return ChunkTimeRangeValue, nil
   177  
   178  		case chunkTimeRangeKeyV5:
   179  			return ChunkTimeRangeValue, nil
   180  
   181  		// metric name range values
   182  		case metricNameRangeKeyV1:
   183  			return MetricNameRangeValue, nil
   184  
   185  		// series range values
   186  		case seriesRangeKeyV1:
   187  			return SeriesRangeValue, nil
   188  		}
   189  	}
   190  	return 0, fmt.Errorf("unrecognised range value type. version: %q", string(components[3]))
   191  }
   192  
   193  func TestSchemaRangeKey(t *testing.T) {
   194  	const (
   195  		userID     = "userid"
   196  		metricName = "foo"
   197  		chunkID    = "chunkID"
   198  	)
   199  
   200  	var (
   201  		hourlyBuckets = makeStoreSchema("v1")
   202  		dailyBuckets  = makeStoreSchema("v2")
   203  		base64Keys    = makeStoreSchema("v3")
   204  		labelBuckets  = makeStoreSchema("v4")
   205  		tsRangeKeys   = makeStoreSchema("v5")
   206  		v6RangeKeys   = makeStoreSchema("v6")
   207  		metric        = labels.Labels{
   208  			{Name: model.MetricNameLabel, Value: metricName},
   209  			{Name: "bar", Value: "bary"},
   210  			{Name: "baz", Value: "bazy"},
   211  		}
   212  	)
   213  
   214  	mkEntries := func(hashKey string, callback func(labelName, labelValue string) ([]byte, []byte)) []IndexEntry {
   215  		result := []IndexEntry{}
   216  		for _, label := range metric {
   217  			if label.Name == model.MetricNameLabel {
   218  				continue
   219  			}
   220  			rangeValue, value := callback(label.Name, label.Value)
   221  			result = append(result, IndexEntry{
   222  				TableName:  table,
   223  				HashValue:  hashKey,
   224  				RangeValue: rangeValue,
   225  				Value:      value,
   226  			})
   227  		}
   228  		return result
   229  	}
   230  
   231  	for i, tc := range []struct {
   232  		StoreSchema
   233  		want []IndexEntry
   234  	}{
   235  		// Basic test case for the various bucketing schemes
   236  		{
   237  			hourlyBuckets,
   238  			mkEntries("userid:0:foo", func(labelName, labelValue string) ([]byte, []byte) {
   239  				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
   240  			}),
   241  		},
   242  		{
   243  			dailyBuckets,
   244  			mkEntries("userid:d0:foo", func(labelName, labelValue string) ([]byte, []byte) {
   245  				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
   246  			}),
   247  		},
   248  		{
   249  			base64Keys,
   250  			mkEntries("userid:d0:foo", func(labelName, labelValue string) ([]byte, []byte) {
   251  				encodedValue := base64.RawStdEncoding.EncodeToString([]byte(labelValue))
   252  				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x001\x00", labelName, encodedValue, chunkID)), nil
   253  			}),
   254  		},
   255  		{
   256  			labelBuckets,
   257  			[]IndexEntry{
   258  				{
   259  					TableName:  table,
   260  					HashValue:  "userid:d0:foo",
   261  					RangeValue: []byte("\x00\x00chunkID\x002\x00"),
   262  				},
   263  				{
   264  					TableName:  table,
   265  					HashValue:  "userid:d0:foo:bar",
   266  					RangeValue: []byte("\x00YmFyeQ\x00chunkID\x001\x00"),
   267  				},
   268  				{
   269  					TableName:  table,
   270  					HashValue:  "userid:d0:foo:baz",
   271  					RangeValue: []byte("\x00YmF6eQ\x00chunkID\x001\x00"),
   272  				},
   273  			},
   274  		},
   275  		{
   276  			tsRangeKeys,
   277  			[]IndexEntry{
   278  				{
   279  					TableName:  table,
   280  					HashValue:  "userid:d0:foo",
   281  					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
   282  				},
   283  				{
   284  					TableName:  table,
   285  					HashValue:  "userid:d0:foo:bar",
   286  					RangeValue: []byte("0036ee7f\x00YmFyeQ\x00chunkID\x004\x00"),
   287  				},
   288  				{
   289  					TableName:  table,
   290  					HashValue:  "userid:d0:foo:baz",
   291  					RangeValue: []byte("0036ee7f\x00YmF6eQ\x00chunkID\x004\x00"),
   292  				},
   293  			},
   294  		},
   295  		{
   296  			v6RangeKeys,
   297  			[]IndexEntry{
   298  				{
   299  					TableName:  table,
   300  					HashValue:  "userid:d0:foo",
   301  					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
   302  				},
   303  				{
   304  					TableName:  table,
   305  					HashValue:  "userid:d0:foo:bar",
   306  					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
   307  					Value:      []byte("bary"),
   308  				},
   309  				{
   310  					TableName:  table,
   311  					HashValue:  "userid:d0:foo:baz",
   312  					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
   313  					Value:      []byte("bazy"),
   314  				},
   315  			},
   316  		},
   317  	} {
   318  		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
   319  			have, err := tc.StoreSchema.GetWriteEntries(
   320  				model.TimeFromUnix(0), model.TimeFromUnix(60*60)-1,
   321  				userID, metricName,
   322  				metric, chunkID,
   323  			)
   324  			if err != nil {
   325  				t.Fatal(err)
   326  			}
   327  			sort.Sort(ByHashRangeKey(have))
   328  			sort.Sort(ByHashRangeKey(tc.want))
   329  			if !reflect.DeepEqual(tc.want, have) {
   330  				t.Fatalf("wrong hash buckets - %s", test.Diff(tc.want, have))
   331  			}
   332  
   333  			// Test we can parse the resulting range keys
   334  			for _, entry := range have {
   335  				rangeValueType, err := parseRangeValueType(entry.RangeValue)
   336  				require.NoError(t, err)
   337  
   338  				switch rangeValueType {
   339  				case MetricNameRangeValue:
   340  					_, err := parseMetricNameRangeValue(entry.RangeValue, entry.Value)
   341  					require.NoError(t, err)
   342  				case ChunkTimeRangeValue:
   343  					_, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
   344  					require.NoError(t, err)
   345  				case SeriesRangeValue:
   346  					_, err := parseSeriesRangeValue(entry.RangeValue, entry.Value)
   347  					require.NoError(t, err)
   348  				}
   349  			}
   350  		})
   351  	}
   352  }
   353  
   354  func BenchmarkEncodeLabelsJson(b *testing.B) {
   355  	decoded := &labels.Labels{}
   356  	lbs := labels.FromMap(map[string]string{
   357  		"foo":      "bar",
   358  		"fuzz":     "buzz",
   359  		"cluster":  "test",
   360  		"test":     "test1",
   361  		"instance": "cortex-01",
   362  		"bar":      "foo",
   363  		"version":  "0.1",
   364  	})
   365  	json := jsoniter.ConfigFastest
   366  	var data []byte
   367  	var err error
   368  	for n := 0; n < b.N; n++ {
   369  		data, err = json.Marshal(lbs)
   370  		if err != nil {
   371  			panic(err)
   372  		}
   373  		err = json.Unmarshal(data, decoded)
   374  		if err != nil {
   375  			panic(err)
   376  		}
   377  	}
   378  	b.Log("data size", len(data))
   379  	b.Log("decode", decoded)
   380  }
   381  
   382  func BenchmarkEncodeLabelsString(b *testing.B) {
   383  	var decoded labels.Labels
   384  	lbs := labels.FromMap(map[string]string{
   385  		"foo":      "bar",
   386  		"fuzz":     "buzz",
   387  		"cluster":  "test",
   388  		"test":     "test1",
   389  		"instance": "cortex-01",
   390  		"bar":      "foo",
   391  		"version":  "0.1",
   392  	})
   393  	var data []byte
   394  	var err error
   395  	for n := 0; n < b.N; n++ {
   396  		data = []byte(lbs.String())
   397  		decoded, err = parser.ParseMetric(string(data))
   398  		if err != nil {
   399  			panic(err)
   400  		}
   401  	}
   402  	b.Log("data size", len(data))
   403  	b.Log("decode", decoded)
   404  }
   405  
   406  func TestV10IndexQueries(t *testing.T) {
   407  	fromShards := func(n int) (res []IndexQuery) {
   408  		for i := 0; i < n; i++ {
   409  			res = append(res, IndexQuery{
   410  				TableName:       "tbl",
   411  				HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, "hash", "metric", "label"),
   412  				RangeValueStart: []byte(fmt.Sprint(i)),
   413  				ValueEqual:      []byte(fmt.Sprint(i)),
   414  			})
   415  		}
   416  		return res
   417  	}
   418  
   419  	var testExprs = []struct {
   420  		name     string
   421  		queries  []IndexQuery
   422  		shard    *astmapper.ShardAnnotation
   423  		expected []IndexQuery
   424  	}{
   425  		{
   426  			name:     "passthrough when no shard specified",
   427  			queries:  fromShards(2),
   428  			shard:    nil,
   429  			expected: fromShards(2),
   430  		},
   431  		{
   432  			name:    "out of bounds shard returns 0 matches",
   433  			queries: fromShards(2),
   434  			shard: &astmapper.ShardAnnotation{
   435  				Shard: 3,
   436  			},
   437  			expected: nil,
   438  		},
   439  		{
   440  			name:    "return correct shard",
   441  			queries: fromShards(3),
   442  			shard: &astmapper.ShardAnnotation{
   443  				Shard: 1,
   444  			},
   445  			expected: []IndexQuery{fromShards(2)[1]},
   446  		},
   447  	}
   448  
   449  	for _, c := range testExprs {
   450  		t.Run(c.name, func(t *testing.T) {
   451  			s := v10Entries{}
   452  			filtered := s.FilterReadQueries(c.queries, c.shard)
   453  			require.Equal(t, c.expected, filtered)
   454  		})
   455  	}
   456  }