github.com/thanos-io/thanos@v0.32.5/pkg/block/indexheader/header_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package indexheader
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"math"
    10  	"path/filepath"
    11  	"strconv"
    12  	"testing"
    13  
    14  	"github.com/go-kit/log"
    15  	"github.com/oklog/ulid"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/prometheus/model/labels"
    18  	"github.com/prometheus/prometheus/tsdb/encoding"
    19  	"github.com/prometheus/prometheus/tsdb/fileutil"
    20  	"github.com/prometheus/prometheus/tsdb/index"
    21  	"github.com/thanos-io/objstore"
    22  	"github.com/thanos-io/objstore/providers/filesystem"
    23  
    24  	"github.com/efficientgo/core/testutil"
    25  	"github.com/thanos-io/thanos/pkg/block"
    26  	"github.com/thanos-io/thanos/pkg/block/metadata"
    27  	"github.com/thanos-io/thanos/pkg/testutil/e2eutil"
    28  )
    29  
    30  func TestReaders(t *testing.T) {
    31  	ctx := context.Background()
    32  
    33  	tmpDir := t.TempDir()
    34  
    35  	bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
    36  	testutil.Ok(t, err)
    37  	defer func() { testutil.Ok(t, bkt.Close()) }()
    38  
    39  	// Create block index version 2.
    40  	id1, err := e2eutil.CreateBlock(ctx, tmpDir, []labels.Labels{
    41  		{{Name: "a", Value: "1"}},
    42  		{{Name: "a", Value: "2"}},
    43  		{{Name: "a", Value: "3"}},
    44  		{{Name: "a", Value: "4"}},
    45  		{{Name: "a", Value: "5"}},
    46  		{{Name: "a", Value: "6"}},
    47  		{{Name: "a", Value: "7"}},
    48  		{{Name: "a", Value: "8"}},
    49  		{{Name: "a", Value: "9"}},
    50  		// Missing 10 on purpose.
    51  		{{Name: "a", Value: "11"}},
    52  		{{Name: "a", Value: "12"}},
    53  		{{Name: "a", Value: "13"}},
    54  		{{Name: "a", Value: "1"}, {Name: "longer-string", Value: "1"}},
    55  		{{Name: "a", Value: "1"}, {Name: "longer-string", Value: "2"}},
    56  	}, 100, 0, 1000, labels.Labels{{Name: "ext1", Value: "1"}}, 124, metadata.NoneFunc)
    57  	testutil.Ok(t, err)
    58  
    59  	testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, filepath.Join(tmpDir, id1.String()), metadata.NoneFunc))
    60  
    61  	// Copy block index version 1 for backward compatibility.
    62  	/* The block here was produced at the commit
    63  	    706602daed1487f7849990678b4ece4599745905 used in 2.0.0 with:
    64  	   db, _ := Open("v1db", nil, nil, nil)
    65  	   app := db.Appender()
    66  	   app.Add(labels.FromStrings("foo", "bar"), 1, 2)
    67  	   app.Add(labels.FromStrings("foo", "baz"), 3, 4)
    68  	   app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block.
    69  	   // Make sure we've enough values for the lack of sorting of postings offsets to show up.
    70  	   for i := 0; i < 100; i++ {
    71  	     app.Add(labels.FromStrings("bar", strconv.FormatInt(int64(i), 10)), 0, 0)
    72  	   }
    73  	   app.Commit()
    74  	   db.compact()
    75  	   db.Close()
    76  	*/
    77  
    78  	m, err := metadata.ReadFromDir("./testdata/index_format_v1")
    79  	testutil.Ok(t, err)
    80  	e2eutil.Copy(t, "./testdata/index_format_v1", filepath.Join(tmpDir, m.ULID.String()))
    81  
    82  	_, err = metadata.InjectThanos(log.NewNopLogger(), filepath.Join(tmpDir, m.ULID.String()), metadata.Thanos{
    83  		Labels:     labels.Labels{{Name: "ext1", Value: "1"}}.Map(),
    84  		Downsample: metadata.ThanosDownsample{Resolution: 0},
    85  		Source:     metadata.TestSource,
    86  	}, &m.BlockMeta)
    87  	testutil.Ok(t, err)
    88  	testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, filepath.Join(tmpDir, m.ULID.String()), metadata.NoneFunc))
    89  
    90  	for _, id := range []ulid.ULID{id1, m.ULID} {
    91  		t.Run(id.String(), func(t *testing.T) {
    92  			indexFile, err := fileutil.OpenMmapFile(filepath.Join(tmpDir, id.String(), block.IndexFilename))
    93  			testutil.Ok(t, err)
    94  			defer func() { _ = indexFile.Close() }()
    95  
    96  			b := realByteSlice(indexFile.Bytes())
    97  
    98  			t.Run("binary reader", func(t *testing.T) {
    99  				fn := filepath.Join(tmpDir, id.String(), block.IndexHeaderFilename)
   100  				_, err := WriteBinary(ctx, bkt, id, fn)
   101  				testutil.Ok(t, err)
   102  
   103  				br, err := NewBinaryReader(ctx, log.NewNopLogger(), nil, tmpDir, id, 3)
   104  				testutil.Ok(t, err)
   105  
   106  				defer func() { testutil.Ok(t, br.Close()) }()
   107  
   108  				if id == id1 {
   109  					testutil.Equals(t, 1, br.version)
   110  					testutil.Equals(t, 2, br.indexVersion)
   111  					testutil.Equals(t, &BinaryTOC{Symbols: headerLen, PostingsOffsetTable: 70}, br.toc)
   112  					testutil.Equals(t, int64(710), br.indexLastPostingEnd)
   113  					testutil.Equals(t, 8, br.symbols.Size())
   114  					testutil.Equals(t, 0, len(br.postingsV1))
   115  					testutil.Equals(t, 2, len(br.nameSymbols))
   116  					testutil.Equals(t, map[string]*postingValueOffsets{
   117  						"": {
   118  							offsets:       []postingOffset{{value: "", tableOff: 4}},
   119  							lastValOffset: 440,
   120  						},
   121  						"a": {
   122  							offsets: []postingOffset{
   123  								{value: "1", tableOff: 9},
   124  								{value: "13", tableOff: 32},
   125  								{value: "4", tableOff: 54},
   126  								{value: "7", tableOff: 75},
   127  								{value: "9", tableOff: 89},
   128  							},
   129  							lastValOffset: 640,
   130  						},
   131  						"longer-string": {
   132  							offsets: []postingOffset{
   133  								{value: "1", tableOff: 96},
   134  								{value: "2", tableOff: 115},
   135  							},
   136  							lastValOffset: 706,
   137  						},
   138  					}, br.postings)
   139  
   140  					vals, err := br.LabelValues("not-existing")
   141  					testutil.Ok(t, err)
   142  					testutil.Equals(t, []string(nil), vals)
   143  
   144  					// Regression tests for https://github.com/thanos-io/thanos/issues/2213.
   145  					// Most of not existing value was working despite bug, except in certain unlucky cases
   146  					// it was causing "invalid size" errors.
   147  					_, err = br.PostingsOffset("not-existing", "1")
   148  					testutil.Equals(t, NotFoundRangeErr, err)
   149  					_, err = br.PostingsOffset("a", "0")
   150  					testutil.Equals(t, NotFoundRangeErr, err)
   151  					// Unlucky case, because the bug was causing unnecessary read & decode requiring more bytes than
   152  					// available. For rest cases read was noop wrong, but at least not failing.
   153  					_, err = br.PostingsOffset("a", "10")
   154  					testutil.Equals(t, NotFoundRangeErr, err)
   155  					_, err = br.PostingsOffset("a", "121")
   156  					testutil.Equals(t, NotFoundRangeErr, err)
   157  					_, err = br.PostingsOffset("a", "131")
   158  					testutil.Equals(t, NotFoundRangeErr, err)
   159  					_, err = br.PostingsOffset("a", "91")
   160  					testutil.Equals(t, NotFoundRangeErr, err)
   161  					_, err = br.PostingsOffset("longer-string", "0")
   162  					testutil.Equals(t, NotFoundRangeErr, err)
   163  					_, err = br.PostingsOffset("longer-string", "11")
   164  					testutil.Equals(t, NotFoundRangeErr, err)
   165  					_, err = br.PostingsOffset("longer-string", "21")
   166  					testutil.Equals(t, NotFoundRangeErr, err)
   167  				}
   168  
   169  				compareIndexToHeader(t, b, br)
   170  			})
   171  
   172  			t.Run("lazy binary reader", func(t *testing.T) {
   173  				fn := filepath.Join(tmpDir, id.String(), block.IndexHeaderFilename)
   174  				_, err := WriteBinary(ctx, bkt, id, fn)
   175  				testutil.Ok(t, err)
   176  
   177  				br, err := NewLazyBinaryReader(ctx, log.NewNopLogger(), nil, tmpDir, id, 3, NewLazyBinaryReaderMetrics(nil), nil)
   178  				testutil.Ok(t, err)
   179  
   180  				defer func() { testutil.Ok(t, br.Close()) }()
   181  
   182  				compareIndexToHeader(t, b, br)
   183  			})
   184  		})
   185  	}
   186  
   187  }
   188  
   189  func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) {
   190  	indexReader, err := index.NewReader(indexByteSlice)
   191  	testutil.Ok(t, err)
   192  	defer func() { _ = indexReader.Close() }()
   193  
   194  	actVersion, err := headerReader.IndexVersion()
   195  	testutil.Ok(t, err)
   196  	testutil.Equals(t, indexReader.Version(), actVersion)
   197  
   198  	if indexReader.Version() == index.FormatV2 {
   199  		// For v2 symbols ref sequential integers 0, 1, 2 etc.
   200  		iter := indexReader.Symbols()
   201  		i := 0
   202  		for iter.Next() {
   203  			r, err := headerReader.LookupSymbol(uint32(i))
   204  			testutil.Ok(t, err)
   205  			testutil.Equals(t, iter.At(), r)
   206  
   207  			i++
   208  		}
   209  		testutil.Ok(t, iter.Err())
   210  		_, err := headerReader.LookupSymbol(uint32(i))
   211  		testutil.NotOk(t, err)
   212  
   213  	} else {
   214  		// For v1 symbols refs are actual offsets in the index.
   215  		symbols, err := getSymbolTable(indexByteSlice)
   216  		testutil.Ok(t, err)
   217  
   218  		for refs, sym := range symbols {
   219  			r, err := headerReader.LookupSymbol(refs)
   220  			testutil.Ok(t, err)
   221  			testutil.Equals(t, sym, r)
   222  		}
   223  		_, err = headerReader.LookupSymbol(200000)
   224  		testutil.NotOk(t, err)
   225  	}
   226  
   227  	expLabelNames, err := indexReader.LabelNames()
   228  	testutil.Ok(t, err)
   229  	actualLabelNames, err := headerReader.LabelNames()
   230  	testutil.Ok(t, err)
   231  	testutil.Equals(t, expLabelNames, actualLabelNames)
   232  
   233  	expRanges, err := indexReader.PostingsRanges()
   234  	testutil.Ok(t, err)
   235  
   236  	minStart := int64(math.MaxInt64)
   237  	maxEnd := int64(math.MinInt64)
   238  	for il, lname := range expLabelNames {
   239  		expectedLabelVals, err := indexReader.SortedLabelValues(lname)
   240  		testutil.Ok(t, err)
   241  
   242  		vals, err := headerReader.LabelValues(lname)
   243  		testutil.Ok(t, err)
   244  		testutil.Equals(t, expectedLabelVals, vals)
   245  
   246  		for iv, v := range vals {
   247  			if minStart > expRanges[labels.Label{Name: lname, Value: v}].Start {
   248  				minStart = expRanges[labels.Label{Name: lname, Value: v}].Start
   249  			}
   250  			if maxEnd < expRanges[labels.Label{Name: lname, Value: v}].End {
   251  				maxEnd = expRanges[labels.Label{Name: lname, Value: v}].End
   252  			}
   253  
   254  			ptr, err := headerReader.PostingsOffset(lname, v)
   255  			testutil.Ok(t, err)
   256  
   257  			// For index-cache those values are exact.
   258  			//
   259  			// For binary they are exact except last item posting offset. It's good enough if the value is larger than exact posting ending.
   260  			if indexReader.Version() == index.FormatV2 {
   261  				if iv == len(vals)-1 && il == len(expLabelNames)-1 {
   262  					testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}].Start, ptr.Start)
   263  					testutil.Assert(t, expRanges[labels.Label{Name: lname, Value: v}].End <= ptr.End, "got offset %v earlier than actual posting end %v ", ptr.End, expRanges[labels.Label{Name: lname, Value: v}].End)
   264  					continue
   265  				}
   266  			} else {
   267  				// For index formatV1 the last one does not mean literally last value, as postings were not sorted.
   268  				// Account for that. We know it's 40 label value.
   269  				if v == "40" {
   270  					testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}].Start, ptr.Start)
   271  					testutil.Assert(t, expRanges[labels.Label{Name: lname, Value: v}].End <= ptr.End, "got offset %v earlier than actual posting end %v ", ptr.End, expRanges[labels.Label{Name: lname, Value: v}].End)
   272  					continue
   273  				}
   274  			}
   275  			testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}], ptr)
   276  		}
   277  	}
   278  
   279  	ptr, err := headerReader.PostingsOffset(index.AllPostingsKey())
   280  	testutil.Ok(t, err)
   281  	testutil.Equals(t, expRanges[labels.Label{Name: "", Value: ""}].Start, ptr.Start)
   282  	testutil.Equals(t, expRanges[labels.Label{Name: "", Value: ""}].End, ptr.End)
   283  }
   284  
   285  func prepareIndexV2Block(t testing.TB, tmpDir string, bkt objstore.Bucket) *metadata.Meta {
   286  	/* Copy index 6MB block index version 2. It was generated via thanosbench. Meta.json:
   287  		{
   288  		"ulid": "01DRBP4RNVZ94135ZA6B10EMRR",
   289  		"minTime": 1570766415000,
   290  		"maxTime": 1570939215001,
   291  		"stats": {
   292  			"numSamples": 115210000,
   293  			"numSeries": 10000,
   294  			"numChunks": 990000
   295  		},
   296  		"compaction": {
   297  			"level": 1,
   298  			"sources": [
   299  				"01DRBP4RNVZ94135ZA6B10EMRR"
   300  			]
   301  		},
   302  		"version": 1,
   303  		"thanos": {
   304  			"labels": {
   305  				"cluster": "one",
   306  				"dataset": "continuous"
   307  			},
   308  			"downsample": {
   309  				"resolution": 0
   310  			},
   311  			"source": "blockgen"
   312  		}
   313  	}
   314  	*/
   315  
   316  	m, err := metadata.ReadFromDir("./testdata/index_format_v2")
   317  	testutil.Ok(t, err)
   318  	e2eutil.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String()))
   319  
   320  	_, err = metadata.InjectThanos(log.NewNopLogger(), filepath.Join(tmpDir, m.ULID.String()), metadata.Thanos{
   321  		Labels:     labels.Labels{{Name: "ext1", Value: "1"}}.Map(),
   322  		Downsample: metadata.ThanosDownsample{Resolution: 0},
   323  		Source:     metadata.TestSource,
   324  	}, &m.BlockMeta)
   325  	testutil.Ok(t, err)
   326  	testutil.Ok(t, block.Upload(context.Background(), log.NewNopLogger(), bkt, filepath.Join(tmpDir, m.ULID.String()), metadata.NoneFunc))
   327  
   328  	return m
   329  }
   330  
   331  func BenchmarkBinaryWrite(t *testing.B) {
   332  	ctx := context.Background()
   333  
   334  	tmpDir := t.TempDir()
   335  
   336  	bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
   337  	testutil.Ok(t, err)
   338  	defer func() { testutil.Ok(t, bkt.Close()) }()
   339  
   340  	m := prepareIndexV2Block(t, tmpDir, bkt)
   341  	fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename)
   342  
   343  	t.ResetTimer()
   344  	for i := 0; i < t.N; i++ {
   345  		_, err := WriteBinary(ctx, bkt, m.ULID, fn)
   346  		testutil.Ok(t, err)
   347  	}
   348  }
   349  
   350  func BenchmarkBinaryReader(t *testing.B) {
   351  	ctx := context.Background()
   352  	tmpDir := t.TempDir()
   353  
   354  	bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
   355  	testutil.Ok(t, err)
   356  
   357  	m := prepareIndexV2Block(t, tmpDir, bkt)
   358  	fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename)
   359  	_, err = WriteBinary(ctx, bkt, m.ULID, fn)
   360  	testutil.Ok(t, err)
   361  
   362  	t.ResetTimer()
   363  	for i := 0; i < t.N; i++ {
   364  		br, err := newFileBinaryReader(fn, 32)
   365  		testutil.Ok(t, err)
   366  		testutil.Ok(t, br.Close())
   367  	}
   368  }
   369  
   370  func BenchmarkBinaryReader_LookupSymbol(b *testing.B) {
   371  	for _, numSeries := range []int{valueSymbolsCacheSize, valueSymbolsCacheSize * 10} {
   372  		b.Run(fmt.Sprintf("num series = %d", numSeries), func(b *testing.B) {
   373  			benchmarkBinaryReaderLookupSymbol(b, numSeries)
   374  		})
   375  	}
   376  }
   377  
   378  func benchmarkBinaryReaderLookupSymbol(b *testing.B, numSeries int) {
   379  	const postingOffsetsInMemSampling = 32
   380  
   381  	ctx := context.Background()
   382  	logger := log.NewNopLogger()
   383  
   384  	tmpDir := b.TempDir()
   385  
   386  	bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
   387  	testutil.Ok(b, err)
   388  	defer func() { testutil.Ok(b, bkt.Close()) }()
   389  
   390  	// Generate series labels.
   391  	seriesLabels := make([]labels.Labels, 0, numSeries)
   392  	for i := 0; i < numSeries; i++ {
   393  		seriesLabels = append(seriesLabels, labels.Labels{{Name: "a", Value: strconv.Itoa(i)}})
   394  	}
   395  
   396  	// Create a block.
   397  	id1, err := e2eutil.CreateBlock(ctx, tmpDir, seriesLabels, 100, 0, 1000, labels.Labels{{Name: "ext1", Value: "1"}}, 124, metadata.NoneFunc)
   398  	testutil.Ok(b, err)
   399  	testutil.Ok(b, block.Upload(ctx, logger, bkt, filepath.Join(tmpDir, id1.String()), metadata.NoneFunc))
   400  
   401  	// Create an index reader.
   402  	reader, err := NewBinaryReader(ctx, logger, bkt, tmpDir, id1, postingOffsetsInMemSampling)
   403  	testutil.Ok(b, err)
   404  
   405  	// Get the offset of each label value symbol.
   406  	symbolsOffsets := make([]uint32, numSeries)
   407  	for i := 0; i < numSeries; i++ {
   408  		o, err := reader.symbols.ReverseLookup(strconv.Itoa(i))
   409  		testutil.Ok(b, err)
   410  
   411  		symbolsOffsets[i] = o
   412  	}
   413  
   414  	b.ResetTimer()
   415  
   416  	for n := 0; n < b.N; n++ {
   417  		for i := 0; i < len(symbolsOffsets); i++ {
   418  			if _, err := reader.LookupSymbol(symbolsOffsets[i]); err != nil {
   419  				b.Fail()
   420  			}
   421  		}
   422  	}
   423  }
   424  
   425  func getSymbolTable(b index.ByteSlice) (map[uint32]string, error) {
   426  	version := int(b.Range(4, 5)[0])
   427  
   428  	if version != 1 && version != 2 {
   429  		return nil, errors.Errorf("unknown index file version %d", version)
   430  	}
   431  
   432  	toc, err := index.NewTOCFromByteSlice(b)
   433  	if err != nil {
   434  		return nil, errors.Wrap(err, "read TOC")
   435  	}
   436  
   437  	symbolsV2, symbolsV1, err := readSymbols(b, version, int(toc.Symbols))
   438  	if err != nil {
   439  		return nil, errors.Wrap(err, "read symbols")
   440  	}
   441  
   442  	symbolsTable := make(map[uint32]string, len(symbolsV1)+len(symbolsV2))
   443  	for o, s := range symbolsV1 {
   444  		symbolsTable[o] = s
   445  	}
   446  	for o, s := range symbolsV2 {
   447  		symbolsTable[uint32(o)] = s
   448  	}
   449  	return symbolsTable, nil
   450  }
   451  
   452  // readSymbols reads the symbol table fully into memory and allocates proper strings for them.
   453  // Strings backed by the mmap'd memory would cause memory faults if applications keep using them
   454  // after the reader is closed.
   455  func readSymbols(bs index.ByteSlice, version, off int) ([]string, map[uint32]string, error) {
   456  	if off == 0 {
   457  		return nil, nil, nil
   458  	}
   459  	d := encoding.NewDecbufAt(bs, off, castagnoliTable)
   460  
   461  	var (
   462  		origLen     = d.Len()
   463  		cnt         = d.Be32int()
   464  		basePos     = uint32(off) + 4
   465  		nextPos     = basePos + uint32(origLen-d.Len())
   466  		symbolSlice []string
   467  		symbols     = map[uint32]string{}
   468  	)
   469  	if version == index.FormatV2 {
   470  		symbolSlice = make([]string, 0, cnt)
   471  	}
   472  
   473  	for d.Err() == nil && d.Len() > 0 && cnt > 0 {
   474  		s := d.UvarintStr()
   475  
   476  		if version == index.FormatV2 {
   477  			symbolSlice = append(symbolSlice, s)
   478  		} else {
   479  			symbols[nextPos] = s
   480  			nextPos = basePos + uint32(origLen-d.Len())
   481  		}
   482  		cnt--
   483  	}
   484  	return symbolSlice, symbols, errors.Wrap(d.Err(), "read symbols")
   485  }