github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/memdb/index/index_test.go (about) 1 // Copyright 2017 The Prometheus Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package index 15 16 import ( 17 "context" 18 "fmt" 19 "hash/crc32" 20 "math/rand" 21 "os" 22 "path/filepath" 23 "sort" 24 "testing" 25 26 "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" 27 28 "github.com/pkg/errors" 29 "github.com/stretchr/testify/require" 30 "go.uber.org/goleak" 31 32 "github.com/prometheus/common/model" 33 "github.com/prometheus/prometheus/model/labels" 34 "github.com/prometheus/prometheus/storage" 35 "github.com/prometheus/prometheus/tsdb/encoding" 36 "github.com/prometheus/prometheus/util/testutil" 37 38 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 39 "github.com/grafana/pyroscope/pkg/iter" 40 phlaremodel "github.com/grafana/pyroscope/pkg/model" 41 ) 42 43 func TestMain(m *testing.M) { 44 goleak.VerifyTestMain(m, 45 goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), 46 goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"), 47 goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"), 48 ) 49 } 50 51 type series struct { 52 l phlaremodel.Labels 53 chunks []index.ChunkMeta 54 } 55 56 type mockIndex struct { 57 series map[storage.SeriesRef]series 58 // we're forced to use a anonymous struct here because we can't use typesv1.LabelPair as it's not comparable. 59 postings map[struct{ Name, Value string }][]storage.SeriesRef 60 symbols map[string]struct{} 61 } 62 63 func newMockIndex() mockIndex { 64 allPostingsKeyName, allPostingsKeyValue := index.AllPostingsKey() 65 ix := mockIndex{ 66 series: make(map[storage.SeriesRef]series), 67 postings: make(map[struct{ Name, Value string }][]storage.SeriesRef), 68 symbols: make(map[string]struct{}), 69 } 70 ix.postings[struct { 71 Name string 72 Value string 73 }{allPostingsKeyName, allPostingsKeyValue}] = []storage.SeriesRef{} 74 return ix 75 } 76 77 func (m mockIndex) Symbols() (map[string]struct{}, error) { 78 return m.symbols, nil 79 } 80 81 func (m mockIndex) AddSeries(ref storage.SeriesRef, l phlaremodel.Labels, chunks ...index.ChunkMeta) error { 82 allPostingsKeyName, allPostingsKeyValue := index.AllPostingsKey() 83 84 if _, ok := m.series[ref]; ok { 85 return errors.Errorf("series with reference %d already added", ref) 86 } 87 for _, lbl := range l { 88 m.symbols[lbl.Name] = struct{}{} 89 m.symbols[lbl.Value] = struct{}{} 90 if _, ok := m.postings[struct { 91 Name string 92 Value string 93 }{lbl.Name, lbl.Value}]; !ok { 94 m.postings[struct { 95 Name string 96 Value string 97 }{lbl.Name, lbl.Value}] = []storage.SeriesRef{} 98 } 99 m.postings[struct { 100 Name string 101 Value string 102 }{lbl.Name, lbl.Value}] = append(m.postings[struct { 103 Name string 104 Value string 105 }{lbl.Name, lbl.Value}], ref) 106 } 107 m.postings[struct { 108 Name string 109 Value string 110 }{allPostingsKeyName, allPostingsKeyValue}] = append(m.postings[struct { 111 Name string 112 Value string 113 }{allPostingsKeyName, allPostingsKeyValue}], ref) 114 115 s := series{l: l} 116 // Actual chunk data is not stored in the index. 117 s.chunks = append(s.chunks, chunks...) 118 m.series[ref] = s 119 120 return nil 121 } 122 123 func (m mockIndex) Close() error { 124 return nil 125 } 126 127 func (m mockIndex) LabelValues(name string) ([]string, error) { 128 values := []string{} 129 for l := range m.postings { 130 if l.Name == name { 131 values = append(values, l.Value) 132 } 133 } 134 return values, nil 135 } 136 137 func (m mockIndex) Postings(name string, values ...string) (index.Postings, error) { 138 p := []index.Postings{} 139 for _, value := range values { 140 p = append(p, iter.NewSliceSeekIterator(m.postings[struct { 141 Name string 142 Value string 143 }{Name: name, Value: value}])) 144 } 145 return index.Merge(p...), nil 146 } 147 148 func (m mockIndex) Series(ref storage.SeriesRef, lset *phlaremodel.Labels, chks *[]index.ChunkMeta) error { 149 s, ok := m.series[ref] 150 if !ok { 151 return errors.New("not found") 152 } 153 *lset = append((*lset)[:0], s.l...) 154 *chks = append((*chks)[:0], s.chunks...) 155 156 return nil 157 } 158 159 func TestIndexRW_Create_Open(t *testing.T) { 160 161 // An empty index must still result in a readable file. 162 iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize) 163 require.NoError(t, err) 164 require.NoError(t, iw.Close()) 165 166 bytes := iw.ReleaseIndexBuffer().buf.Bytes() 167 ir, err := NewReader(RealByteSlice(bytes)) 168 require.NoError(t, err) 169 require.NoError(t, ir.Close()) 170 171 // Modify magic header must cause open to fail. 172 //f, err := os.OpenFile(fn, os.O_WRONLY, 0o666) 173 //require.NoError(t, err) 174 //err = iw.f.WriteAt([]byte{0, 0}, 0) 175 bytes[0] = 0 176 require.NoError(t, err) 177 //f.Close() 178 179 //_, err = NewFileReader(dir) 180 //require.Error(t, err) 181 } 182 183 func TestIndexRW_Postings(t *testing.T) { 184 185 iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize) 186 require.NoError(t, err) 187 188 series := []phlaremodel.Labels{ 189 phlaremodel.LabelsFromStrings("a", "1", "b", "1"), 190 phlaremodel.LabelsFromStrings("a", "1", "b", "2"), 191 phlaremodel.LabelsFromStrings("a", "1", "b", "3"), 192 phlaremodel.LabelsFromStrings("a", "1", "b", "4"), 193 } 194 195 require.NoError(t, iw.AddSymbol("1")) 196 require.NoError(t, iw.AddSymbol("2")) 197 require.NoError(t, iw.AddSymbol("3")) 198 require.NoError(t, iw.AddSymbol("4")) 199 require.NoError(t, iw.AddSymbol("a")) 200 require.NoError(t, iw.AddSymbol("b")) 201 202 // Postings lists are only written if a series with the respective 203 // reference was added before. 204 require.NoError(t, iw.AddSeries(1, series[0], model.Fingerprint(series[0].Hash()))) 205 require.NoError(t, iw.AddSeries(2, series[1], model.Fingerprint(series[1].Hash()))) 206 require.NoError(t, iw.AddSeries(3, series[2], model.Fingerprint(series[2].Hash()))) 207 require.NoError(t, iw.AddSeries(4, series[3], model.Fingerprint(series[3].Hash()))) 208 209 require.NoError(t, iw.Close()) 210 211 ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes())) 212 require.NoError(t, err) 213 214 p, err := ir.Postings("a", nil, "1") 215 require.NoError(t, err) 216 217 var l phlaremodel.Labels 218 var c []index.ChunkMeta 219 220 for i := 0; p.Next(); i++ { 221 _, err := ir.Series(p.At(), &l, &c) 222 223 require.NoError(t, err) 224 require.Equal(t, 0, len(c)) 225 require.Equal(t, series[i], l) 226 } 227 require.NoError(t, p.Err()) 228 229 // The label indices are no longer used, so test them by hand here. 230 labelIndices := map[string][]string{} 231 require.NoError(t, ReadOffsetTable(ir.b, ir.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error { 232 if len(key) != 1 { 233 return errors.Errorf("unexpected key length for label indices table %d", len(key)) 234 } 235 236 d := encoding.NewDecbufAt(ir.b, int(off), castagnoliTable) 237 vals := []string{} 238 nc := d.Be32int() 239 if nc != 1 { 240 return errors.Errorf("unexpected number of label indices table names %d", nc) 241 } 242 for i := d.Be32(); i > 0; i-- { 243 v, err := ir.lookupSymbol(d.Be32()) 244 if err != nil { 245 return err 246 } 247 vals = append(vals, v) 248 } 249 labelIndices[key[0]] = vals 250 return d.Err() 251 })) 252 require.Equal(t, map[string][]string{ 253 "a": {"1"}, 254 "b": {"1", "2", "3", "4"}, 255 }, labelIndices) 256 257 require.NoError(t, ir.Close()) 258 } 259 260 func TestPostingsMany(t *testing.T) { 261 262 iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize) 263 require.NoError(t, err) 264 265 // Create a label in the index which has 999 values. 266 symbols := map[string]struct{}{} 267 series := []phlaremodel.Labels{} 268 for i := 1; i < 1000; i++ { 269 v := fmt.Sprintf("%03d", i) 270 series = append(series, phlaremodel.LabelsFromStrings("i", v, "foo", "bar")) 271 symbols[v] = struct{}{} 272 } 273 symbols["i"] = struct{}{} 274 symbols["foo"] = struct{}{} 275 symbols["bar"] = struct{}{} 276 syms := []string{} 277 for s := range symbols { 278 syms = append(syms, s) 279 } 280 sort.Strings(syms) 281 for _, s := range syms { 282 require.NoError(t, iw.AddSymbol(s)) 283 } 284 285 sort.Slice(series, func(i, j int) bool { 286 return series[i].Hash() < series[j].Hash() 287 }) 288 289 for i, s := range series { 290 require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s, model.Fingerprint(s.Hash()))) 291 } 292 require.NoError(t, iw.Close()) 293 294 ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes())) 295 require.NoError(t, err) 296 defer func() { require.NoError(t, ir.Close()) }() 297 298 cases := []struct { 299 in []string 300 }{ 301 // Simple cases, everything is present. 302 {in: []string{"002"}}, 303 {in: []string{"031", "032", "033"}}, 304 {in: []string{"032", "033"}}, 305 {in: []string{"127", "128"}}, 306 {in: []string{"127", "128", "129"}}, 307 {in: []string{"127", "129"}}, 308 {in: []string{"128", "129"}}, 309 {in: []string{"998", "999"}}, 310 {in: []string{"999"}}, 311 // Before actual values. 312 {in: []string{"000"}}, 313 {in: []string{"000", "001"}}, 314 {in: []string{"000", "002"}}, 315 // After actual values. 316 {in: []string{"999a"}}, 317 {in: []string{"999", "999a"}}, 318 {in: []string{"998", "999", "999a"}}, 319 // In the middle of actual values. 320 {in: []string{"126a", "127", "128"}}, 321 {in: []string{"127", "127a", "128"}}, 322 {in: []string{"127", "127a", "128", "128a", "129"}}, 323 {in: []string{"127", "128a", "129"}}, 324 {in: []string{"128", "128a", "129"}}, 325 {in: []string{"128", "129", "129a"}}, 326 {in: []string{"126a", "126b", "127", "127a", "127b", "128", "128a", "128b", "129", "129a", "129b"}}, 327 } 328 329 for _, c := range cases { 330 it, err := ir.Postings("i", nil, c.in...) 331 require.NoError(t, err) 332 333 got := []string{} 334 var lbls phlaremodel.Labels 335 var metas []index.ChunkMeta 336 for it.Next() { 337 _, err := ir.Series(it.At(), &lbls, &metas) 338 require.NoError(t, err) 339 got = append(got, lbls.Get("i")) 340 } 341 require.NoError(t, it.Err()) 342 exp := []string{} 343 for _, e := range c.in { 344 if _, ok := symbols[e]; ok && e != "l" { 345 exp = append(exp, e) 346 } 347 } 348 349 // sort expected values by label hash instead of lexicographically by labelset 350 sort.Slice(exp, func(i, j int) bool { 351 return labels.StableHash(labels.FromStrings("i", exp[i], "foo", "bar")) < labels.StableHash(labels.FromStrings("i", exp[j], "foo", "bar")) 352 }) 353 354 require.Equal(t, exp, got, fmt.Sprintf("input: %v", c.in)) 355 } 356 } 357 358 func TestPersistence_index_e2e(t *testing.T) { 359 lbls, err := labels.ReadLabels("../../../phlaredb/tsdb/testdata/20kseries.json", 20000) 360 require.NoError(t, err) 361 362 flbls := make([]phlaremodel.Labels, len(lbls)) 363 for i, ls := range lbls { 364 flbls[i] = make(phlaremodel.Labels, 0, ls.Len()) 365 ls.Range(func(l labels.Label) { 366 flbls[i] = append(flbls[i], &typesv1.LabelPair{Name: l.Name, Value: l.Value}) 367 }) 368 } 369 370 // Sort labels as the index writer expects series in sorted order by fingerprint. 371 sort.Slice(flbls, func(i, j int) bool { 372 return flbls[i].Hash() < flbls[j].Hash() 373 }) 374 375 symbols := map[string]struct{}{} 376 for _, lset := range lbls { 377 lset.Range(func(l labels.Label) { 378 symbols[l.Name] = struct{}{} 379 symbols[l.Value] = struct{}{} 380 }) 381 } 382 383 var input index.IndexWriterSeriesSlice 384 385 // Generate ChunkMetas for every label set. 386 for i, lset := range flbls { 387 var metas []index.ChunkMeta 388 389 for j := 0; j <= (i % 20); j++ { 390 metas = append(metas, index.ChunkMeta{ 391 MinTime: int64(j * 10000), 392 MaxTime: int64((j + 1) * 10000), 393 Checksum: rand.Uint32(), 394 }) 395 } 396 input = append(input, &index.IndexWriterSeries{ 397 Labels: lset, 398 Chunks: metas, 399 }) 400 } 401 402 iw, err := NewWriter(context.Background(), BlocksIndexWriterBufSize) 403 require.NoError(t, err) 404 405 syms := []string{} 406 for s := range symbols { 407 syms = append(syms, s) 408 } 409 sort.Strings(syms) 410 for _, s := range syms { 411 require.NoError(t, iw.AddSymbol(s)) 412 } 413 414 // Population procedure as done by compaction. 415 var ( 416 postings = index.NewMemPostings() 417 values = map[string]map[string]struct{}{} 418 ) 419 420 mi := newMockIndex() 421 422 for i, s := range input { 423 err = iw.AddSeries(storage.SeriesRef(i), s.Labels, model.Fingerprint(s.Labels.Hash()), s.Chunks...) 424 require.NoError(t, err) 425 require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.Labels, s.Chunks...)) 426 427 for _, l := range s.Labels { 428 valset, ok := values[l.Name] 429 if !ok { 430 valset = map[string]struct{}{} 431 values[l.Name] = valset 432 } 433 valset[l.Value] = struct{}{} 434 } 435 postings.Add(storage.SeriesRef(i), s.Labels) 436 } 437 438 err = iw.Close() 439 require.NoError(t, err) 440 441 ir, err := NewReader(RealByteSlice(iw.ReleaseIndexBuffer().buf.Bytes())) 442 require.NoError(t, err) 443 444 for p := range mi.postings { 445 gotp, err := ir.Postings(p.Name, nil, p.Value) 446 require.NoError(t, err) 447 448 expp, err := mi.Postings(p.Name, p.Value) 449 require.NoError(t, err) 450 451 var lset, explset phlaremodel.Labels 452 var chks, expchks []index.ChunkMeta 453 454 for gotp.Next() { 455 require.True(t, expp.Next()) 456 457 ref := gotp.At() 458 459 _, err := ir.Series(ref, &lset, &chks) 460 require.NoError(t, err) 461 462 err = mi.Series(expp.At(), &explset, &expchks) 463 require.NoError(t, err) 464 require.Equal(t, explset, lset) 465 require.Equal(t, expchks, chks) 466 } 467 require.False(t, expp.Next(), "Expected no more postings for %q=%q", p.Name, p.Value) 468 require.NoError(t, gotp.Err()) 469 } 470 471 labelPairs := map[string][]string{} 472 for l := range mi.postings { 473 labelPairs[l.Name] = append(labelPairs[l.Name], l.Value) 474 } 475 for k, v := range labelPairs { 476 sort.Strings(v) 477 478 res, err := ir.SortedLabelValues(k) 479 require.NoError(t, err) 480 481 require.Equal(t, len(v), len(res)) 482 for i := 0; i < len(v); i++ { 483 require.Equal(t, v[i], res[i]) 484 } 485 } 486 487 gotSymbols := []string{} 488 it := ir.Symbols() 489 for it.Next() { 490 gotSymbols = append(gotSymbols, it.At()) 491 } 492 require.NoError(t, it.Err()) 493 expSymbols := []string{} 494 for s := range mi.symbols { 495 expSymbols = append(expSymbols, s) 496 } 497 sort.Strings(expSymbols) 498 require.Equal(t, expSymbols, gotSymbols) 499 500 require.NoError(t, ir.Close()) 501 } 502 503 func TestDecbufUvarintWithInvalidBuffer(t *testing.T) { 504 b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) 505 506 db := encoding.NewDecbufUvarintAt(b, 0, castagnoliTable) 507 require.Error(t, db.Err()) 508 } 509 510 func TestReaderWithInvalidBuffer(t *testing.T) { 511 b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) 512 513 _, err := NewReader(b) 514 require.Error(t, err) 515 } 516 517 // TestNewFileReaderErrorNoOpenFiles ensures that in case of an error no file remains open. 518 func TestNewFileReaderErrorNoOpenFiles(t *testing.T) { 519 dir := testutil.NewTemporaryDirectory("block", t) 520 521 idxName := filepath.Join(dir.Path(), "index") 522 err := os.WriteFile(idxName, []byte("corrupted contents"), 0o666) 523 require.NoError(t, err) 524 525 _, err = NewFileReader(idxName) 526 require.Error(t, err) 527 528 // dir.Close will fail on Win if idxName fd is not closed on error path. 529 dir.Close() 530 } 531 532 func TestSymbols(t *testing.T) { 533 buf := encoding.Encbuf{} 534 535 // Add prefix to the buffer to simulate symbols as part of larger buffer. 536 buf.PutUvarintStr("something") 537 538 symbolsStart := buf.Len() 539 buf.PutBE32int(204) // Length of symbols table. 540 buf.PutBE32int(100) // Number of symbols. 541 for i := 0; i < 100; i++ { 542 // i represents index in unicode characters table. 543 buf.PutUvarintStr(string(rune(i))) // Symbol. 544 } 545 checksum := crc32.Checksum(buf.Get()[symbolsStart+4:], castagnoliTable) 546 buf.PutBE32(checksum) // Check sum at the end. 547 548 s, err := NewSymbols(RealByteSlice(buf.Get()), FormatV2, symbolsStart) 549 require.NoError(t, err) 550 551 // We store only 4 offsets to symbols. 552 require.Equal(t, 32, s.Size()) 553 554 for i := 99; i >= 0; i-- { 555 s, err := s.Lookup(uint32(i)) 556 require.NoError(t, err) 557 require.Equal(t, string(rune(i)), s) 558 } 559 _, err = s.Lookup(100) 560 require.Error(t, err) 561 562 for i := 99; i >= 0; i-- { 563 r, err := s.ReverseLookup(string(rune(i))) 564 require.NoError(t, err) 565 require.Equal(t, uint32(i), r) 566 } 567 _, err = s.ReverseLookup(string(rune(100))) 568 require.Error(t, err) 569 570 iter := s.Iter() 571 i := 0 572 for iter.Next() { 573 require.Equal(t, string(rune(i)), iter.At()) 574 i++ 575 } 576 require.NoError(t, iter.Err()) 577 } 578 579 func TestDecoder_Postings_WrongInput(t *testing.T) { 580 _, _, err := (&Decoder{}).Postings([]byte("the cake is a lie")) 581 require.Error(t, err) 582 } 583 584 func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) { 585 w, err := NewWriter(context.Background(), BlocksIndexWriterBufSize) 586 require.NoError(t, err) 587 588 require.NoError(t, w.AddSymbol("__name__")) 589 require.NoError(t, w.AddSymbol("metric_1")) 590 require.NoError(t, w.AddSymbol("metric_2")) 591 592 require.NoError(t, w.AddSeries(0, phlaremodel.LabelsFromStrings("__name__", "metric_1", "__name__", "metric_2"), 0)) 593 594 err = w.Close() 595 require.Error(t, err) 596 require.ErrorContains(t, err, "corruption detected when writing postings to index") 597 }