github.com/thanos-io/thanos@v0.32.5/pkg/store/postings_codec_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package store 5 6 import ( 7 "bytes" 8 "context" 9 crand "crypto/rand" 10 "io" 11 "math" 12 "math/rand" 13 "os" 14 "sort" 15 "strconv" 16 "testing" 17 18 "github.com/klauspost/compress/s2" 19 "github.com/prometheus/prometheus/model/labels" 20 "github.com/prometheus/prometheus/storage" 21 "github.com/prometheus/prometheus/tsdb" 22 "github.com/prometheus/prometheus/tsdb/index" 23 24 "github.com/efficientgo/core/testutil" 25 storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil" 26 ) 27 28 func TestStreamedSnappyMaximumDecodedLen(t *testing.T) { 29 t.Run("compressed", func(t *testing.T) { 30 b := make([]byte, 100) 31 for i := 0; i < 100; i++ { 32 b[i] = 0x42 33 } 34 35 snappyEncoded := &bytes.Buffer{} 36 37 sw := s2.NewWriter(snappyEncoded, s2.WriterSnappyCompat(), s2.WriterBestCompression()) 38 39 _, err := sw.Write(b) 40 testutil.Ok(t, err) 41 42 testutil.Ok(t, sw.Close()) 43 44 maxLen, err := maximumDecodedLenSnappyStreamed(snappyEncoded.Bytes()) 45 testutil.Ok(t, err) 46 t.Log(maxLen) 47 testutil.Assert(t, maxLen == 100) 48 }) 49 t.Run("random", func(t *testing.T) { 50 for i := 10000; i < 30000; i++ { 51 b := make([]byte, i) 52 _, err := crand.Read(b) 53 testutil.Ok(t, err) 54 55 snappyEncoded := &bytes.Buffer{} 56 57 sw := s2.NewWriter(snappyEncoded, s2.WriterSnappyCompat()) 58 59 _, err = sw.Write(b) 60 testutil.Ok(t, err) 61 62 testutil.Ok(t, sw.Close()) 63 64 maxLen, err := maximumDecodedLenSnappyStreamed(snappyEncoded.Bytes()) 65 testutil.Ok(t, err) 66 testutil.Assert(t, maxLen > 100) 67 testutil.Assert(t, maxLen < 30000) 68 } 69 }) 70 } 71 72 func TestDiffVarintCodec(t *testing.T) { 73 chunksDir := t.TempDir() 74 75 headOpts := tsdb.DefaultHeadOptions() 76 headOpts.ChunkDirRoot = chunksDir 77 headOpts.ChunkRange = 1000 78 h, err := tsdb.NewHead(nil, nil, nil, nil, headOpts, nil) 79 testutil.Ok(t, err) 80 defer func() { 81 testutil.Ok(t, h.Close()) 82 }() 83 84 appendTestData(t, h.Appender(context.Background()), 1e6) 85 86 idx, err := h.Index() 87 testutil.Ok(t, err) 88 defer func() { 89 testutil.Ok(t, idx.Close()) 90 }() 91 92 postingsMap := map[string]index.Postings{ 93 "all": allPostings(t, idx), 94 `n="1"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchEqual, "n", "1"+storetestutil.LabelLongSuffix)), 95 `j="foo"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchEqual, "j", "foo")), 96 `j!="foo"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")), 97 `i=~".*"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")), 98 `i=~".+"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", ".+")), 99 `i=~"1.+"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+")), 100 `i=~"^$"'`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")), 101 `i!~""`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "i", "")), 102 `n!="2"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+storetestutil.LabelLongSuffix)), 103 `i!~"2.*"`: matchPostings(t, idx, labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")), 104 } 105 106 codecs := map[string]struct { 107 codingFunction func(index.Postings, int) ([]byte, error) 108 decodingFunction func([]byte, bool) (closeablePostings, error) 109 }{ 110 "raw": {codingFunction: diffVarintEncodeNoHeader, decodingFunction: func(bytes []byte, disablePooling bool) (closeablePostings, error) { 111 return newDiffVarintPostings(bytes, nil), nil 112 }}, 113 "snappy": {codingFunction: diffVarintSnappyEncode, decodingFunction: diffVarintSnappyDecode}, 114 "snappyStreamed": {codingFunction: diffVarintSnappyStreamedEncode, decodingFunction: diffVarintSnappyStreamedDecode}, 115 } 116 117 for postingName, postings := range postingsMap { 118 p, err := toUint64Postings(postings) 119 testutil.Ok(t, err) 120 121 for cname, codec := range codecs { 122 name := cname + "/" + postingName 123 124 t.Run(name, func(t *testing.T) { 125 t.Log("postings entries:", p.len()) 126 t.Log("original size (4*entries):", 4*p.len(), "bytes") 127 p.reset() // We reuse postings between runs, so we need to reset iterator. 128 129 data, err := codec.codingFunction(p, p.len()) 130 testutil.Ok(t, err) 131 132 t.Log("encoded size", len(data), "bytes") 133 t.Logf("ratio: %0.3f", float64(len(data))/float64(4*p.len())) 134 135 decodedPostings, err := codec.decodingFunction(data, false) 136 testutil.Ok(t, err) 137 138 p.reset() 139 comparePostings(t, p, decodedPostings) 140 }) 141 } 142 } 143 } 144 145 func comparePostings(t *testing.T, p1, p2 index.Postings) { 146 for p1.Next() { 147 if !p2.Next() { 148 t.Log("p1 has more values") 149 t.Fail() 150 return 151 } 152 153 if p1.At() != p2.At() { 154 t.Logf("values differ: %d, %d", p1.At(), p2.At()) 155 t.Fail() 156 return 157 } 158 } 159 160 if p2.Next() { 161 t.Log("p2 has more values") 162 t.Fail() 163 return 164 } 165 166 testutil.Ok(t, p1.Err()) 167 testutil.Ok(t, p2.Err()) 168 } 169 170 func allPostings(t testing.TB, ix tsdb.IndexReader) index.Postings { 171 k, v := index.AllPostingsKey() 172 p, err := ix.Postings(k, v) 173 testutil.Ok(t, err) 174 return p 175 } 176 177 func matchPostings(t testing.TB, ix tsdb.IndexReader, m *labels.Matcher) index.Postings { 178 vals, err := ix.LabelValues(m.Name) 179 testutil.Ok(t, err) 180 181 matching := []string(nil) 182 for _, v := range vals { 183 if m.Matches(v) { 184 matching = append(matching, v) 185 } 186 } 187 188 p, err := ix.Postings(m.Name, matching...) 189 testutil.Ok(t, err) 190 return p 191 } 192 193 func toUint64Postings(p index.Postings) (*uint64Postings, error) { 194 var vals []storage.SeriesRef 195 for p.Next() { 196 vals = append(vals, p.At()) 197 } 198 return &uint64Postings{vals: vals, ix: -1}, p.Err() 199 } 200 201 // Postings with no decoding step. 202 type uint64Postings struct { 203 vals []storage.SeriesRef 204 ix int 205 } 206 207 func (p *uint64Postings) At() storage.SeriesRef { 208 if p.ix < 0 || p.ix >= len(p.vals) { 209 return 0 210 } 211 return p.vals[p.ix] 212 } 213 214 func (p *uint64Postings) Next() bool { 215 if p.ix < len(p.vals)-1 { 216 p.ix++ 217 return true 218 } 219 return false 220 } 221 222 func (p *uint64Postings) Seek(x storage.SeriesRef) bool { 223 if p.At() >= x { 224 return true 225 } 226 227 // We cannot do any search due to how values are stored, 228 // so we simply advance until we find the right value. 229 for p.Next() { 230 if p.At() >= x { 231 return true 232 } 233 } 234 235 return false 236 } 237 238 func (p *uint64Postings) Err() error { 239 return nil 240 } 241 242 func (p *uint64Postings) reset() { 243 p.ix = -1 244 } 245 246 func (p *uint64Postings) len() int { 247 return len(p.vals) 248 } 249 250 func BenchmarkPostingsEncodingDecoding(b *testing.B) { 251 const max = 1000000 252 r := rand.New(rand.NewSource(0)) 253 254 p := make([]storage.SeriesRef, max) 255 256 for ix := 1; ix < len(p); ix++ { 257 // Use normal distribution, with stddev=64 (i.e. most values are < 64). 258 // This is very rough approximation of experiments with real blocks.v 259 d := math.Abs(r.NormFloat64()*64) + 1 260 261 p[ix] = p[ix-1] + storage.SeriesRef(d) 262 } 263 264 codecs := map[string]struct { 265 codingFunction func(index.Postings, int) ([]byte, error) 266 decodingFunction func([]byte, bool) (closeablePostings, error) 267 }{ 268 "raw": {codingFunction: diffVarintEncodeNoHeader, decodingFunction: func(bytes []byte, disablePooling bool) (closeablePostings, error) { 269 return newDiffVarintPostings(bytes, nil), nil 270 }}, 271 "snappy": {codingFunction: diffVarintSnappyEncode, decodingFunction: diffVarintSnappyDecode}, 272 "snappyStreamed": {codingFunction: diffVarintSnappyStreamedEncode, decodingFunction: diffVarintSnappyStreamedDecode}, 273 } 274 b.ReportAllocs() 275 276 for _, count := range []int{10000, 100000, 1000000} { 277 b.Run(strconv.Itoa(count), func(b *testing.B) { 278 for codecName, codecFns := range codecs { 279 b.Run(codecName, func(b *testing.B) { 280 b.Run("encode", func(b *testing.B) { 281 for i := 0; i < b.N; i++ { 282 ps := &uint64Postings{vals: p[:count]} 283 284 _, err := codecFns.codingFunction(ps, ps.len()) 285 if err != nil { 286 b.Fatal(err) 287 } 288 } 289 }) 290 b.Run("decode", func(b *testing.B) { 291 ps := &uint64Postings{vals: p[:count]} 292 293 encoded, err := codecFns.codingFunction(ps, ps.len()) 294 if err != nil { 295 b.Fatal(err) 296 } 297 b.ResetTimer() 298 299 for i := 0; i < b.N; i++ { 300 decoded, err := codecFns.decodingFunction(encoded, true) 301 if err != nil { 302 b.Fatal(err) 303 } 304 305 for decoded.Next() { 306 var _ = decoded.At() 307 } 308 testutil.Ok(b, decoded.Err()) 309 } 310 }) 311 }) 312 } 313 }) 314 } 315 } 316 317 func FuzzSnappyStreamEncoding(f *testing.F) { 318 f.Add(10, 123) 319 320 f.Fuzz(func(t *testing.T, postingsCount, seedInit int) { 321 if postingsCount <= 0 { 322 return 323 } 324 r := rand.New(rand.NewSource(int64(seedInit))) 325 p := make([]storage.SeriesRef, postingsCount) 326 327 for ix := 1; ix < len(p); ix++ { 328 d := math.Abs(r.NormFloat64()*math.MaxUint64) + 1 329 330 p[ix] = p[ix-1] + storage.SeriesRef(d) 331 } 332 333 sort.Slice(p, func(i, j int) bool { 334 return p[i] < p[j] 335 }) 336 337 ps := &uint64Postings{vals: p} 338 339 _, err := diffVarintSnappyStreamedEncode(ps, ps.len()) 340 testutil.Ok(t, err) 341 }) 342 } 343 344 func TestRegressionIssue6545(t *testing.T) { 345 diffVarintPostings, err := os.ReadFile("6545postingsrepro") 346 testutil.Ok(t, err) 347 348 gotPostings := 0 349 dvp := newDiffVarintPostings(diffVarintPostings, nil) 350 decodedPostings := []storage.SeriesRef{} 351 for dvp.Next() { 352 decodedPostings = append(decodedPostings, dvp.At()) 353 gotPostings++ 354 } 355 testutil.Ok(t, dvp.Err()) 356 testutil.Equals(t, 114024, gotPostings) 357 358 dataToCache, err := snappyStreamedEncode(114024, diffVarintPostings) 359 testutil.Ok(t, err) 360 361 // Check that the original decompressor works well. 362 sr := s2.NewReader(bytes.NewBuffer(dataToCache[3:])) 363 readBytes, err := io.ReadAll(sr) 364 testutil.Ok(t, err) 365 testutil.Equals(t, readBytes, diffVarintPostings) 366 367 dvp = newDiffVarintPostings(readBytes, nil) 368 gotPostings = 0 369 for dvp.Next() { 370 gotPostings++ 371 } 372 testutil.Equals(t, 114024, gotPostings) 373 374 p, err := decodePostings(dataToCache) 375 testutil.Ok(t, err) 376 377 i := 0 378 for p.Next() { 379 post := p.At() 380 testutil.Equals(t, uint64(decodedPostings[i]), uint64(post)) 381 i++ 382 } 383 384 testutil.Ok(t, p.Err()) 385 testutil.Equals(t, 114024, i) 386 }