github.com/thanos-io/thanos@v0.32.5/pkg/store/postings_codec_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package store
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	crand "crypto/rand"
    10  	"io"
    11  	"math"
    12  	"math/rand"
    13  	"os"
    14  	"sort"
    15  	"strconv"
    16  	"testing"
    17  
    18  	"github.com/klauspost/compress/s2"
    19  	"github.com/prometheus/prometheus/model/labels"
    20  	"github.com/prometheus/prometheus/storage"
    21  	"github.com/prometheus/prometheus/tsdb"
    22  	"github.com/prometheus/prometheus/tsdb/index"
    23  
    24  	"github.com/efficientgo/core/testutil"
    25  	storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil"
    26  )
    27  
    28  func TestStreamedSnappyMaximumDecodedLen(t *testing.T) {
    29  	t.Run("compressed", func(t *testing.T) {
    30  		b := make([]byte, 100)
    31  		for i := 0; i < 100; i++ {
    32  			b[i] = 0x42
    33  		}
    34  
    35  		snappyEncoded := &bytes.Buffer{}
    36  
    37  		sw := s2.NewWriter(snappyEncoded, s2.WriterSnappyCompat(), s2.WriterBestCompression())
    38  
    39  		_, err := sw.Write(b)
    40  		testutil.Ok(t, err)
    41  
    42  		testutil.Ok(t, sw.Close())
    43  
    44  		maxLen, err := maximumDecodedLenSnappyStreamed(snappyEncoded.Bytes())
    45  		testutil.Ok(t, err)
    46  		t.Log(maxLen)
    47  		testutil.Assert(t, maxLen == 100)
    48  	})
    49  	t.Run("random", func(t *testing.T) {
    50  		for i := 10000; i < 30000; i++ {
    51  			b := make([]byte, i)
    52  			_, err := crand.Read(b)
    53  			testutil.Ok(t, err)
    54  
    55  			snappyEncoded := &bytes.Buffer{}
    56  
    57  			sw := s2.NewWriter(snappyEncoded, s2.WriterSnappyCompat())
    58  
    59  			_, err = sw.Write(b)
    60  			testutil.Ok(t, err)
    61  
    62  			testutil.Ok(t, sw.Close())
    63  
    64  			maxLen, err := maximumDecodedLenSnappyStreamed(snappyEncoded.Bytes())
    65  			testutil.Ok(t, err)
    66  			testutil.Assert(t, maxLen > 100)
    67  			testutil.Assert(t, maxLen < 30000)
    68  		}
    69  	})
    70  }
    71  
    72  func TestDiffVarintCodec(t *testing.T) {
    73  	chunksDir := t.TempDir()
    74  
    75  	headOpts := tsdb.DefaultHeadOptions()
    76  	headOpts.ChunkDirRoot = chunksDir
    77  	headOpts.ChunkRange = 1000
    78  	h, err := tsdb.NewHead(nil, nil, nil, nil, headOpts, nil)
    79  	testutil.Ok(t, err)
    80  	defer func() {
    81  		testutil.Ok(t, h.Close())
    82  	}()
    83  
    84  	appendTestData(t, h.Appender(context.Background()), 1e6)
    85  
    86  	idx, err := h.Index()
    87  	testutil.Ok(t, err)
    88  	defer func() {
    89  		testutil.Ok(t, idx.Close())
    90  	}()
    91  
    92  	postingsMap := map[string]index.Postings{
    93  		"all":      allPostings(t, idx),
    94  		`n="1"`:    matchPostings(t, idx, labels.MustNewMatcher(labels.MatchEqual, "n", "1"+storetestutil.LabelLongSuffix)),
    95  		`j="foo"`:  matchPostings(t, idx, labels.MustNewMatcher(labels.MatchEqual, "j", "foo")),
    96  		`j!="foo"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")),
    97  		`i=~".*"`:  matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")),
    98  		`i=~".+"`:  matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", ".+")),
    99  		`i=~"1.+"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+")),
   100  		`i=~"^$"'`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")),
   101  		`i!~""`:    matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "i", "")),
   102  		`n!="2"`:   matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+storetestutil.LabelLongSuffix)),
   103  		`i!~"2.*"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")),
   104  	}
   105  
   106  	codecs := map[string]struct {
   107  		codingFunction   func(index.Postings, int) ([]byte, error)
   108  		decodingFunction func([]byte, bool) (closeablePostings, error)
   109  	}{
   110  		"raw": {codingFunction: diffVarintEncodeNoHeader, decodingFunction: func(bytes []byte, disablePooling bool) (closeablePostings, error) {
   111  			return newDiffVarintPostings(bytes, nil), nil
   112  		}},
   113  		"snappy":         {codingFunction: diffVarintSnappyEncode, decodingFunction: diffVarintSnappyDecode},
   114  		"snappyStreamed": {codingFunction: diffVarintSnappyStreamedEncode, decodingFunction: diffVarintSnappyStreamedDecode},
   115  	}
   116  
   117  	for postingName, postings := range postingsMap {
   118  		p, err := toUint64Postings(postings)
   119  		testutil.Ok(t, err)
   120  
   121  		for cname, codec := range codecs {
   122  			name := cname + "/" + postingName
   123  
   124  			t.Run(name, func(t *testing.T) {
   125  				t.Log("postings entries:", p.len())
   126  				t.Log("original size (4*entries):", 4*p.len(), "bytes")
   127  				p.reset() // We reuse postings between runs, so we need to reset iterator.
   128  
   129  				data, err := codec.codingFunction(p, p.len())
   130  				testutil.Ok(t, err)
   131  
   132  				t.Log("encoded size", len(data), "bytes")
   133  				t.Logf("ratio: %0.3f", float64(len(data))/float64(4*p.len()))
   134  
   135  				decodedPostings, err := codec.decodingFunction(data, false)
   136  				testutil.Ok(t, err)
   137  
   138  				p.reset()
   139  				comparePostings(t, p, decodedPostings)
   140  			})
   141  		}
   142  	}
   143  }
   144  
   145  func comparePostings(t *testing.T, p1, p2 index.Postings) {
   146  	for p1.Next() {
   147  		if !p2.Next() {
   148  			t.Log("p1 has more values")
   149  			t.Fail()
   150  			return
   151  		}
   152  
   153  		if p1.At() != p2.At() {
   154  			t.Logf("values differ: %d, %d", p1.At(), p2.At())
   155  			t.Fail()
   156  			return
   157  		}
   158  	}
   159  
   160  	if p2.Next() {
   161  		t.Log("p2 has more values")
   162  		t.Fail()
   163  		return
   164  	}
   165  
   166  	testutil.Ok(t, p1.Err())
   167  	testutil.Ok(t, p2.Err())
   168  }
   169  
   170  func allPostings(t testing.TB, ix tsdb.IndexReader) index.Postings {
   171  	k, v := index.AllPostingsKey()
   172  	p, err := ix.Postings(k, v)
   173  	testutil.Ok(t, err)
   174  	return p
   175  }
   176  
   177  func matchPostings(t testing.TB, ix tsdb.IndexReader, m *labels.Matcher) index.Postings {
   178  	vals, err := ix.LabelValues(m.Name)
   179  	testutil.Ok(t, err)
   180  
   181  	matching := []string(nil)
   182  	for _, v := range vals {
   183  		if m.Matches(v) {
   184  			matching = append(matching, v)
   185  		}
   186  	}
   187  
   188  	p, err := ix.Postings(m.Name, matching...)
   189  	testutil.Ok(t, err)
   190  	return p
   191  }
   192  
   193  func toUint64Postings(p index.Postings) (*uint64Postings, error) {
   194  	var vals []storage.SeriesRef
   195  	for p.Next() {
   196  		vals = append(vals, p.At())
   197  	}
   198  	return &uint64Postings{vals: vals, ix: -1}, p.Err()
   199  }
   200  
   201  // Postings with no decoding step.
   202  type uint64Postings struct {
   203  	vals []storage.SeriesRef
   204  	ix   int
   205  }
   206  
   207  func (p *uint64Postings) At() storage.SeriesRef {
   208  	if p.ix < 0 || p.ix >= len(p.vals) {
   209  		return 0
   210  	}
   211  	return p.vals[p.ix]
   212  }
   213  
   214  func (p *uint64Postings) Next() bool {
   215  	if p.ix < len(p.vals)-1 {
   216  		p.ix++
   217  		return true
   218  	}
   219  	return false
   220  }
   221  
   222  func (p *uint64Postings) Seek(x storage.SeriesRef) bool {
   223  	if p.At() >= x {
   224  		return true
   225  	}
   226  
   227  	// We cannot do any search due to how values are stored,
   228  	// so we simply advance until we find the right value.
   229  	for p.Next() {
   230  		if p.At() >= x {
   231  			return true
   232  		}
   233  	}
   234  
   235  	return false
   236  }
   237  
   238  func (p *uint64Postings) Err() error {
   239  	return nil
   240  }
   241  
   242  func (p *uint64Postings) reset() {
   243  	p.ix = -1
   244  }
   245  
   246  func (p *uint64Postings) len() int {
   247  	return len(p.vals)
   248  }
   249  
   250  func BenchmarkPostingsEncodingDecoding(b *testing.B) {
   251  	const max = 1000000
   252  	r := rand.New(rand.NewSource(0))
   253  
   254  	p := make([]storage.SeriesRef, max)
   255  
   256  	for ix := 1; ix < len(p); ix++ {
   257  		// Use normal distribution, with stddev=64 (i.e. most values are < 64).
   258  		// This is very rough approximation of experiments with real blocks.v
   259  		d := math.Abs(r.NormFloat64()*64) + 1
   260  
   261  		p[ix] = p[ix-1] + storage.SeriesRef(d)
   262  	}
   263  
   264  	codecs := map[string]struct {
   265  		codingFunction   func(index.Postings, int) ([]byte, error)
   266  		decodingFunction func([]byte, bool) (closeablePostings, error)
   267  	}{
   268  		"raw": {codingFunction: diffVarintEncodeNoHeader, decodingFunction: func(bytes []byte, disablePooling bool) (closeablePostings, error) {
   269  			return newDiffVarintPostings(bytes, nil), nil
   270  		}},
   271  		"snappy":         {codingFunction: diffVarintSnappyEncode, decodingFunction: diffVarintSnappyDecode},
   272  		"snappyStreamed": {codingFunction: diffVarintSnappyStreamedEncode, decodingFunction: diffVarintSnappyStreamedDecode},
   273  	}
   274  	b.ReportAllocs()
   275  
   276  	for _, count := range []int{10000, 100000, 1000000} {
   277  		b.Run(strconv.Itoa(count), func(b *testing.B) {
   278  			for codecName, codecFns := range codecs {
   279  				b.Run(codecName, func(b *testing.B) {
   280  					b.Run("encode", func(b *testing.B) {
   281  						for i := 0; i < b.N; i++ {
   282  							ps := &uint64Postings{vals: p[:count]}
   283  
   284  							_, err := codecFns.codingFunction(ps, ps.len())
   285  							if err != nil {
   286  								b.Fatal(err)
   287  							}
   288  						}
   289  					})
   290  					b.Run("decode", func(b *testing.B) {
   291  						ps := &uint64Postings{vals: p[:count]}
   292  
   293  						encoded, err := codecFns.codingFunction(ps, ps.len())
   294  						if err != nil {
   295  							b.Fatal(err)
   296  						}
   297  						b.ResetTimer()
   298  
   299  						for i := 0; i < b.N; i++ {
   300  							decoded, err := codecFns.decodingFunction(encoded, true)
   301  							if err != nil {
   302  								b.Fatal(err)
   303  							}
   304  
   305  							for decoded.Next() {
   306  								var _ = decoded.At()
   307  							}
   308  							testutil.Ok(b, decoded.Err())
   309  						}
   310  					})
   311  				})
   312  			}
   313  		})
   314  	}
   315  }
   316  
   317  func FuzzSnappyStreamEncoding(f *testing.F) {
   318  	f.Add(10, 123)
   319  
   320  	f.Fuzz(func(t *testing.T, postingsCount, seedInit int) {
   321  		if postingsCount <= 0 {
   322  			return
   323  		}
   324  		r := rand.New(rand.NewSource(int64(seedInit)))
   325  		p := make([]storage.SeriesRef, postingsCount)
   326  
   327  		for ix := 1; ix < len(p); ix++ {
   328  			d := math.Abs(r.NormFloat64()*math.MaxUint64) + 1
   329  
   330  			p[ix] = p[ix-1] + storage.SeriesRef(d)
   331  		}
   332  
   333  		sort.Slice(p, func(i, j int) bool {
   334  			return p[i] < p[j]
   335  		})
   336  
   337  		ps := &uint64Postings{vals: p}
   338  
   339  		_, err := diffVarintSnappyStreamedEncode(ps, ps.len())
   340  		testutil.Ok(t, err)
   341  	})
   342  }
   343  
   344  func TestRegressionIssue6545(t *testing.T) {
   345  	diffVarintPostings, err := os.ReadFile("6545postingsrepro")
   346  	testutil.Ok(t, err)
   347  
   348  	gotPostings := 0
   349  	dvp := newDiffVarintPostings(diffVarintPostings, nil)
   350  	decodedPostings := []storage.SeriesRef{}
   351  	for dvp.Next() {
   352  		decodedPostings = append(decodedPostings, dvp.At())
   353  		gotPostings++
   354  	}
   355  	testutil.Ok(t, dvp.Err())
   356  	testutil.Equals(t, 114024, gotPostings)
   357  
   358  	dataToCache, err := snappyStreamedEncode(114024, diffVarintPostings)
   359  	testutil.Ok(t, err)
   360  
   361  	// Check that the original decompressor works well.
   362  	sr := s2.NewReader(bytes.NewBuffer(dataToCache[3:]))
   363  	readBytes, err := io.ReadAll(sr)
   364  	testutil.Ok(t, err)
   365  	testutil.Equals(t, readBytes, diffVarintPostings)
   366  
   367  	dvp = newDiffVarintPostings(readBytes, nil)
   368  	gotPostings = 0
   369  	for dvp.Next() {
   370  		gotPostings++
   371  	}
   372  	testutil.Equals(t, 114024, gotPostings)
   373  
   374  	p, err := decodePostings(dataToCache)
   375  	testutil.Ok(t, err)
   376  
   377  	i := 0
   378  	for p.Next() {
   379  		post := p.At()
   380  		testutil.Equals(t, uint64(decodedPostings[i]), uint64(post))
   381  		i++
   382  	}
   383  
   384  	testutil.Ok(t, p.Err())
   385  	testutil.Equals(t, 114024, i)
   386  }