github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/memdb/index/index_test.go (about)

     1  // Copyright 2017 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package index
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"hash/crc32"
    20  	"math/rand"
    21  	"os"
    22  	"path/filepath"
    23  	"sort"
    24  	"testing"
    25  
    26  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    27  
    28  	"github.com/pkg/errors"
    29  	"github.com/stretchr/testify/require"
    30  	"go.uber.org/goleak"
    31  
    32  	"github.com/prometheus/common/model"
    33  	"github.com/prometheus/prometheus/model/labels"
    34  	"github.com/prometheus/prometheus/storage"
    35  	"github.com/prometheus/prometheus/tsdb/encoding"
    36  	"github.com/prometheus/prometheus/util/testutil"
    37  
    38  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    39  	"github.com/grafana/pyroscope/pkg/iter"
    40  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    41  )
    42  
    43  func TestMain(m *testing.M) {
    44  	goleak.VerifyTestMain(m,
    45  		goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"),
    46  		goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"),
    47  		goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"),
    48  	)
    49  }
    50  
    51  type series struct {
    52  	l      phlaremodel.Labels
    53  	chunks []index.ChunkMeta
    54  }
    55  
    56  type mockIndex struct {
    57  	series map[storage.SeriesRef]series
    58  	// we're forced to use a anonymous struct here because we can't use typesv1.LabelPair as it's not comparable.
    59  	postings map[struct{ Name, Value string }][]storage.SeriesRef
    60  	symbols  map[string]struct{}
    61  }
    62  
    63  func newMockIndex() mockIndex {
    64  	allPostingsKeyName, allPostingsKeyValue := index.AllPostingsKey()
    65  	ix := mockIndex{
    66  		series:   make(map[storage.SeriesRef]series),
    67  		postings: make(map[struct{ Name, Value string }][]storage.SeriesRef),
    68  		symbols:  make(map[string]struct{}),
    69  	}
    70  	ix.postings[struct {
    71  		Name  string
    72  		Value string
    73  	}{allPostingsKeyName, allPostingsKeyValue}] = []storage.SeriesRef{}
    74  	return ix
    75  }
    76  
    77  func (m mockIndex) Symbols() (map[string]struct{}, error) {
    78  	return m.symbols, nil
    79  }
    80  
    81  func (m mockIndex) AddSeries(ref storage.SeriesRef, l phlaremodel.Labels, chunks ...index.ChunkMeta) error {
    82  	allPostingsKeyName, allPostingsKeyValue := index.AllPostingsKey()
    83  
    84  	if _, ok := m.series[ref]; ok {
    85  		return errors.Errorf("series with reference %d already added", ref)
    86  	}
    87  	for _, lbl := range l {
    88  		m.symbols[lbl.Name] = struct{}{}
    89  		m.symbols[lbl.Value] = struct{}{}
    90  		if _, ok := m.postings[struct {
    91  			Name  string
    92  			Value string
    93  		}{lbl.Name, lbl.Value}]; !ok {
    94  			m.postings[struct {
    95  				Name  string
    96  				Value string
    97  			}{lbl.Name, lbl.Value}] = []storage.SeriesRef{}
    98  		}
    99  		m.postings[struct {
   100  			Name  string
   101  			Value string
   102  		}{lbl.Name, lbl.Value}] = append(m.postings[struct {
   103  			Name  string
   104  			Value string
   105  		}{lbl.Name, lbl.Value}], ref)
   106  	}
   107  	m.postings[struct {
   108  		Name  string
   109  		Value string
   110  	}{allPostingsKeyName, allPostingsKeyValue}] = append(m.postings[struct {
   111  		Name  string
   112  		Value string
   113  	}{allPostingsKeyName, allPostingsKeyValue}], ref)
   114  
   115  	s := series{l: l}
   116  	// Actual chunk data is not stored in the index.
   117  	s.chunks = append(s.chunks, chunks...)
   118  	m.series[ref] = s
   119  
   120  	return nil
   121  }
   122  
   123  func (m mockIndex) Close() error {
   124  	return nil
   125  }
   126  
   127  func (m mockIndex) LabelValues(name string) ([]string, error) {
   128  	values := []string{}
   129  	for l := range m.postings {
   130  		if l.Name == name {
   131  			values = append(values, l.Value)
   132  		}
   133  	}
   134  	return values, nil
   135  }
   136  
   137  func (m mockIndex) Postings(name string, values ...string) (index.Postings, error) {
   138  	p := []index.Postings{}
   139  	for _, value := range values {
   140  		p = append(p, iter.NewSliceSeekIterator(m.postings[struct {
   141  			Name  string
   142  			Value string
   143  		}{Name: name, Value: value}]))
   144  	}
   145  	return index.Merge(p...), nil
   146  }
   147  
   148  func (m mockIndex) Series(ref storage.SeriesRef, lset *phlaremodel.Labels, chks *[]index.ChunkMeta) error {
   149  	s, ok := m.series[ref]
   150  	if !ok {
   151  		return errors.New("not found")
   152  	}
   153  	*lset = append((*lset)[:0], s.l...)
   154  	*chks = append((*chks)[:0], s.chunks...)
   155  
   156  	return nil
   157  }
   158  
   159  func TestIndexRW_Create_Open(t *testing.T) {
   160  
   161  	// An empty index must still result in a readable file.
   162  	iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize)
   163  	require.NoError(t, err)
   164  	require.NoError(t, iw.Close())
   165  
   166  	bytes := iw.ReleaseIndexBuffer().buf.Bytes()
   167  	ir, err := NewReader(RealByteSlice(bytes))
   168  	require.NoError(t, err)
   169  	require.NoError(t, ir.Close())
   170  
   171  	// Modify magic header must cause open to fail.
   172  	//f, err := os.OpenFile(fn, os.O_WRONLY, 0o666)
   173  	//require.NoError(t, err)
   174  	//err = iw.f.WriteAt([]byte{0, 0}, 0)
   175  	bytes[0] = 0
   176  	require.NoError(t, err)
   177  	//f.Close()
   178  
   179  	//_, err = NewFileReader(dir)
   180  	//require.Error(t, err)
   181  }
   182  
   183  func TestIndexRW_Postings(t *testing.T) {
   184  
   185  	iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize)
   186  	require.NoError(t, err)
   187  
   188  	series := []phlaremodel.Labels{
   189  		phlaremodel.LabelsFromStrings("a", "1", "b", "1"),
   190  		phlaremodel.LabelsFromStrings("a", "1", "b", "2"),
   191  		phlaremodel.LabelsFromStrings("a", "1", "b", "3"),
   192  		phlaremodel.LabelsFromStrings("a", "1", "b", "4"),
   193  	}
   194  
   195  	require.NoError(t, iw.AddSymbol("1"))
   196  	require.NoError(t, iw.AddSymbol("2"))
   197  	require.NoError(t, iw.AddSymbol("3"))
   198  	require.NoError(t, iw.AddSymbol("4"))
   199  	require.NoError(t, iw.AddSymbol("a"))
   200  	require.NoError(t, iw.AddSymbol("b"))
   201  
   202  	// Postings lists are only written if a series with the respective
   203  	// reference was added before.
   204  	require.NoError(t, iw.AddSeries(1, series[0], model.Fingerprint(series[0].Hash())))
   205  	require.NoError(t, iw.AddSeries(2, series[1], model.Fingerprint(series[1].Hash())))
   206  	require.NoError(t, iw.AddSeries(3, series[2], model.Fingerprint(series[2].Hash())))
   207  	require.NoError(t, iw.AddSeries(4, series[3], model.Fingerprint(series[3].Hash())))
   208  
   209  	require.NoError(t, iw.Close())
   210  
   211  	ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes()))
   212  	require.NoError(t, err)
   213  
   214  	p, err := ir.Postings("a", nil, "1")
   215  	require.NoError(t, err)
   216  
   217  	var l phlaremodel.Labels
   218  	var c []index.ChunkMeta
   219  
   220  	for i := 0; p.Next(); i++ {
   221  		_, err := ir.Series(p.At(), &l, &c)
   222  
   223  		require.NoError(t, err)
   224  		require.Equal(t, 0, len(c))
   225  		require.Equal(t, series[i], l)
   226  	}
   227  	require.NoError(t, p.Err())
   228  
   229  	// The label indices are no longer used, so test them by hand here.
   230  	labelIndices := map[string][]string{}
   231  	require.NoError(t, ReadOffsetTable(ir.b, ir.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error {
   232  		if len(key) != 1 {
   233  			return errors.Errorf("unexpected key length for label indices table %d", len(key))
   234  		}
   235  
   236  		d := encoding.NewDecbufAt(ir.b, int(off), castagnoliTable)
   237  		vals := []string{}
   238  		nc := d.Be32int()
   239  		if nc != 1 {
   240  			return errors.Errorf("unexpected number of label indices table names %d", nc)
   241  		}
   242  		for i := d.Be32(); i > 0; i-- {
   243  			v, err := ir.lookupSymbol(d.Be32())
   244  			if err != nil {
   245  				return err
   246  			}
   247  			vals = append(vals, v)
   248  		}
   249  		labelIndices[key[0]] = vals
   250  		return d.Err()
   251  	}))
   252  	require.Equal(t, map[string][]string{
   253  		"a": {"1"},
   254  		"b": {"1", "2", "3", "4"},
   255  	}, labelIndices)
   256  
   257  	require.NoError(t, ir.Close())
   258  }
   259  
   260  func TestPostingsMany(t *testing.T) {
   261  
   262  	iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize)
   263  	require.NoError(t, err)
   264  
   265  	// Create a label in the index which has 999 values.
   266  	symbols := map[string]struct{}{}
   267  	series := []phlaremodel.Labels{}
   268  	for i := 1; i < 1000; i++ {
   269  		v := fmt.Sprintf("%03d", i)
   270  		series = append(series, phlaremodel.LabelsFromStrings("i", v, "foo", "bar"))
   271  		symbols[v] = struct{}{}
   272  	}
   273  	symbols["i"] = struct{}{}
   274  	symbols["foo"] = struct{}{}
   275  	symbols["bar"] = struct{}{}
   276  	syms := []string{}
   277  	for s := range symbols {
   278  		syms = append(syms, s)
   279  	}
   280  	sort.Strings(syms)
   281  	for _, s := range syms {
   282  		require.NoError(t, iw.AddSymbol(s))
   283  	}
   284  
   285  	sort.Slice(series, func(i, j int) bool {
   286  		return series[i].Hash() < series[j].Hash()
   287  	})
   288  
   289  	for i, s := range series {
   290  		require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s, model.Fingerprint(s.Hash())))
   291  	}
   292  	require.NoError(t, iw.Close())
   293  
   294  	ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes()))
   295  	require.NoError(t, err)
   296  	defer func() { require.NoError(t, ir.Close()) }()
   297  
   298  	cases := []struct {
   299  		in []string
   300  	}{
   301  		// Simple cases, everything is present.
   302  		{in: []string{"002"}},
   303  		{in: []string{"031", "032", "033"}},
   304  		{in: []string{"032", "033"}},
   305  		{in: []string{"127", "128"}},
   306  		{in: []string{"127", "128", "129"}},
   307  		{in: []string{"127", "129"}},
   308  		{in: []string{"128", "129"}},
   309  		{in: []string{"998", "999"}},
   310  		{in: []string{"999"}},
   311  		// Before actual values.
   312  		{in: []string{"000"}},
   313  		{in: []string{"000", "001"}},
   314  		{in: []string{"000", "002"}},
   315  		// After actual values.
   316  		{in: []string{"999a"}},
   317  		{in: []string{"999", "999a"}},
   318  		{in: []string{"998", "999", "999a"}},
   319  		// In the middle of actual values.
   320  		{in: []string{"126a", "127", "128"}},
   321  		{in: []string{"127", "127a", "128"}},
   322  		{in: []string{"127", "127a", "128", "128a", "129"}},
   323  		{in: []string{"127", "128a", "129"}},
   324  		{in: []string{"128", "128a", "129"}},
   325  		{in: []string{"128", "129", "129a"}},
   326  		{in: []string{"126a", "126b", "127", "127a", "127b", "128", "128a", "128b", "129", "129a", "129b"}},
   327  	}
   328  
   329  	for _, c := range cases {
   330  		it, err := ir.Postings("i", nil, c.in...)
   331  		require.NoError(t, err)
   332  
   333  		got := []string{}
   334  		var lbls phlaremodel.Labels
   335  		var metas []index.ChunkMeta
   336  		for it.Next() {
   337  			_, err := ir.Series(it.At(), &lbls, &metas)
   338  			require.NoError(t, err)
   339  			got = append(got, lbls.Get("i"))
   340  		}
   341  		require.NoError(t, it.Err())
   342  		exp := []string{}
   343  		for _, e := range c.in {
   344  			if _, ok := symbols[e]; ok && e != "l" {
   345  				exp = append(exp, e)
   346  			}
   347  		}
   348  
   349  		// sort expected values by label hash instead of lexicographically by labelset
   350  		sort.Slice(exp, func(i, j int) bool {
   351  			return labels.StableHash(labels.FromStrings("i", exp[i], "foo", "bar")) < labels.StableHash(labels.FromStrings("i", exp[j], "foo", "bar"))
   352  		})
   353  
   354  		require.Equal(t, exp, got, fmt.Sprintf("input: %v", c.in))
   355  	}
   356  }
   357  
   358  func TestPersistence_index_e2e(t *testing.T) {
   359  	lbls, err := labels.ReadLabels("../../../phlaredb/tsdb/testdata/20kseries.json", 20000)
   360  	require.NoError(t, err)
   361  
   362  	flbls := make([]phlaremodel.Labels, len(lbls))
   363  	for i, ls := range lbls {
   364  		flbls[i] = make(phlaremodel.Labels, 0, ls.Len())
   365  		ls.Range(func(l labels.Label) {
   366  			flbls[i] = append(flbls[i], &typesv1.LabelPair{Name: l.Name, Value: l.Value})
   367  		})
   368  	}
   369  
   370  	// Sort labels as the index writer expects series in sorted order by fingerprint.
   371  	sort.Slice(flbls, func(i, j int) bool {
   372  		return flbls[i].Hash() < flbls[j].Hash()
   373  	})
   374  
   375  	symbols := map[string]struct{}{}
   376  	for _, lset := range lbls {
   377  		lset.Range(func(l labels.Label) {
   378  			symbols[l.Name] = struct{}{}
   379  			symbols[l.Value] = struct{}{}
   380  		})
   381  	}
   382  
   383  	var input index.IndexWriterSeriesSlice
   384  
   385  	// Generate ChunkMetas for every label set.
   386  	for i, lset := range flbls {
   387  		var metas []index.ChunkMeta
   388  
   389  		for j := 0; j <= (i % 20); j++ {
   390  			metas = append(metas, index.ChunkMeta{
   391  				MinTime:  int64(j * 10000),
   392  				MaxTime:  int64((j + 1) * 10000),
   393  				Checksum: rand.Uint32(),
   394  			})
   395  		}
   396  		input = append(input, &index.IndexWriterSeries{
   397  			Labels: lset,
   398  			Chunks: metas,
   399  		})
   400  	}
   401  
   402  	iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize)
   403  	require.NoError(t, err)
   404  
   405  	syms := []string{}
   406  	for s := range symbols {
   407  		syms = append(syms, s)
   408  	}
   409  	sort.Strings(syms)
   410  	for _, s := range syms {
   411  		require.NoError(t, iw.AddSymbol(s))
   412  	}
   413  
   414  	// Population procedure as done by compaction.
   415  	var (
   416  		postings = index.NewMemPostings()
   417  		values   = map[string]map[string]struct{}{}
   418  	)
   419  
   420  	mi := newMockIndex()
   421  
   422  	for i, s := range input {
   423  		err = iw.AddSeries(storage.SeriesRef(i), s.Labels, model.Fingerprint(s.Labels.Hash()), s.Chunks...)
   424  		require.NoError(t, err)
   425  		require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.Labels, s.Chunks...))
   426  
   427  		for _, l := range s.Labels {
   428  			valset, ok := values[l.Name]
   429  			if !ok {
   430  				valset = map[string]struct{}{}
   431  				values[l.Name] = valset
   432  			}
   433  			valset[l.Value] = struct{}{}
   434  		}
   435  		postings.Add(storage.SeriesRef(i), s.Labels)
   436  	}
   437  
   438  	err = iw.Close()
   439  	require.NoError(t, err)
   440  
   441  	ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes()))
   442  	require.NoError(t, err)
   443  
   444  	for p := range mi.postings {
   445  		gotp, err := ir.Postings(p.Name, nil, p.Value)
   446  		require.NoError(t, err)
   447  
   448  		expp, err := mi.Postings(p.Name, p.Value)
   449  		require.NoError(t, err)
   450  
   451  		var lset, explset phlaremodel.Labels
   452  		var chks, expchks []index.ChunkMeta
   453  
   454  		for gotp.Next() {
   455  			require.True(t, expp.Next())
   456  
   457  			ref := gotp.At()
   458  
   459  			_, err := ir.Series(ref, &lset, &chks)
   460  			require.NoError(t, err)
   461  
   462  			err = mi.Series(expp.At(), &explset, &expchks)
   463  			require.NoError(t, err)
   464  			require.Equal(t, explset, lset)
   465  			require.Equal(t, expchks, chks)
   466  		}
   467  		require.False(t, expp.Next(), "Expected no more postings for %q=%q", p.Name, p.Value)
   468  		require.NoError(t, gotp.Err())
   469  	}
   470  
   471  	labelPairs := map[string][]string{}
   472  	for l := range mi.postings {
   473  		labelPairs[l.Name] = append(labelPairs[l.Name], l.Value)
   474  	}
   475  	for k, v := range labelPairs {
   476  		sort.Strings(v)
   477  
   478  		res, err := ir.SortedLabelValues(k)
   479  		require.NoError(t, err)
   480  
   481  		require.Equal(t, len(v), len(res))
   482  		for i := 0; i < len(v); i++ {
   483  			require.Equal(t, v[i], res[i])
   484  		}
   485  	}
   486  
   487  	gotSymbols := []string{}
   488  	it := ir.Symbols()
   489  	for it.Next() {
   490  		gotSymbols = append(gotSymbols, it.At())
   491  	}
   492  	require.NoError(t, it.Err())
   493  	expSymbols := []string{}
   494  	for s := range mi.symbols {
   495  		expSymbols = append(expSymbols, s)
   496  	}
   497  	sort.Strings(expSymbols)
   498  	require.Equal(t, expSymbols, gotSymbols)
   499  
   500  	require.NoError(t, ir.Close())
   501  }
   502  
   503  func TestDecbufUvarintWithInvalidBuffer(t *testing.T) {
   504  	b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
   505  
   506  	db := encoding.NewDecbufUvarintAt(b, 0, castagnoliTable)
   507  	require.Error(t, db.Err())
   508  }
   509  
   510  func TestReaderWithInvalidBuffer(t *testing.T) {
   511  	b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
   512  
   513  	_, err := NewReader(b)
   514  	require.Error(t, err)
   515  }
   516  
   517  // TestNewFileReaderErrorNoOpenFiles ensures that in case of an error no file remains open.
   518  func TestNewFileReaderErrorNoOpenFiles(t *testing.T) {
   519  	dir := testutil.NewTemporaryDirectory("block", t)
   520  
   521  	idxName := filepath.Join(dir.Path(), "index")
   522  	err := os.WriteFile(idxName, []byte("corrupted contents"), 0o666)
   523  	require.NoError(t, err)
   524  
   525  	_, err = NewFileReader(idxName)
   526  	require.Error(t, err)
   527  
   528  	// dir.Close will fail on Win if idxName fd is not closed on error path.
   529  	dir.Close()
   530  }
   531  
   532  func TestSymbols(t *testing.T) {
   533  	buf := encoding.Encbuf{}
   534  
   535  	// Add prefix to the buffer to simulate symbols as part of larger buffer.
   536  	buf.PutUvarintStr("something")
   537  
   538  	symbolsStart := buf.Len()
   539  	buf.PutBE32int(204) // Length of symbols table.
   540  	buf.PutBE32int(100) // Number of symbols.
   541  	for i := 0; i < 100; i++ {
   542  		// i represents index in unicode characters table.
   543  		buf.PutUvarintStr(string(rune(i))) // Symbol.
   544  	}
   545  	checksum := crc32.Checksum(buf.Get()[symbolsStart+4:], castagnoliTable)
   546  	buf.PutBE32(checksum) // Check sum at the end.
   547  
   548  	s, err := NewSymbols(RealByteSlice(buf.Get()), FormatV2, symbolsStart)
   549  	require.NoError(t, err)
   550  
   551  	// We store only 4 offsets to symbols.
   552  	require.Equal(t, 32, s.Size())
   553  
   554  	for i := 99; i >= 0; i-- {
   555  		s, err := s.Lookup(uint32(i))
   556  		require.NoError(t, err)
   557  		require.Equal(t, string(rune(i)), s)
   558  	}
   559  	_, err = s.Lookup(100)
   560  	require.Error(t, err)
   561  
   562  	for i := 99; i >= 0; i-- {
   563  		r, err := s.ReverseLookup(string(rune(i)))
   564  		require.NoError(t, err)
   565  		require.Equal(t, uint32(i), r)
   566  	}
   567  	_, err = s.ReverseLookup(string(rune(100)))
   568  	require.Error(t, err)
   569  
   570  	iter := s.Iter()
   571  	i := 0
   572  	for iter.Next() {
   573  		require.Equal(t, string(rune(i)), iter.At())
   574  		i++
   575  	}
   576  	require.NoError(t, iter.Err())
   577  }
   578  
   579  func TestDecoder_Postings_WrongInput(t *testing.T) {
   580  	_, _, err := (&Decoder{}).Postings([]byte("the cake is a lie"))
   581  	require.Error(t, err)
   582  }
   583  
   584  func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) {
   585  	w, err := NewWriter(context.Background(), BlocksIndexWriterBufSize)
   586  	require.NoError(t, err)
   587  
   588  	require.NoError(t, w.AddSymbol("__name__"))
   589  	require.NoError(t, w.AddSymbol("metric_1"))
   590  	require.NoError(t, w.AddSymbol("metric_2"))
   591  
   592  	require.NoError(t, w.AddSeries(0, phlaremodel.LabelsFromStrings("__name__", "metric_1", "__name__", "metric_2"), 0))
   593  
   594  	err = w.Close()
   595  	require.Error(t, err)
   596  	require.ErrorContains(t, err, "corruption detected when writing postings to index")
   597  }