github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/tsdb/index/index_test.go (about) 1 // Copyright 2017 The Prometheus Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package index 15 16 import ( 17 "context" 18 "fmt" 19 "hash/crc32" 20 "math/rand" 21 "os" 22 "path/filepath" 23 "sort" 24 "testing" 25 26 "github.com/pkg/errors" 27 "github.com/stretchr/testify/require" 28 "go.uber.org/goleak" 29 30 "github.com/prometheus/common/model" 31 "github.com/prometheus/prometheus/model/labels" 32 "github.com/prometheus/prometheus/storage" 33 "github.com/prometheus/prometheus/tsdb/encoding" 34 "github.com/prometheus/prometheus/util/testutil" 35 36 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 37 "github.com/grafana/pyroscope/pkg/iter" 38 phlaremodel "github.com/grafana/pyroscope/pkg/model" 39 ) 40 41 func TestMain(m *testing.M) { 42 goleak.VerifyTestMain(m, 43 goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), 44 goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*defaultPolicy).processItems"), 45 goleak.IgnoreTopFunction("github.com/dgraph-io/ristretto.(*Cache).processItems"), 46 ) 47 } 48 49 type series struct { 50 l phlaremodel.Labels 51 chunks []ChunkMeta 52 } 53 54 type mockIndex struct { 55 series map[storage.SeriesRef]series 56 // we're forced to use a anonymous struct here because we can't use typesv1.LabelPair as it's not comparable. 57 postings map[struct{ Name, Value string }][]storage.SeriesRef 58 symbols map[string]struct{} 59 } 60 61 func newMockIndex() mockIndex { 62 ix := mockIndex{ 63 series: make(map[storage.SeriesRef]series), 64 postings: make(map[struct{ Name, Value string }][]storage.SeriesRef), 65 symbols: make(map[string]struct{}), 66 } 67 ix.postings[struct { 68 Name string 69 Value string 70 }{allPostingsKey.Name, allPostingsKey.Value}] = []storage.SeriesRef{} 71 return ix 72 } 73 74 func (m mockIndex) Symbols() (map[string]struct{}, error) { 75 return m.symbols, nil 76 } 77 78 func (m mockIndex) AddSeries(ref storage.SeriesRef, l phlaremodel.Labels, chunks ...ChunkMeta) error { 79 if _, ok := m.series[ref]; ok { 80 return errors.Errorf("series with reference %d already added", ref) 81 } 82 for _, lbl := range l { 83 m.symbols[lbl.Name] = struct{}{} 84 m.symbols[lbl.Value] = struct{}{} 85 if _, ok := m.postings[struct { 86 Name string 87 Value string 88 }{lbl.Name, lbl.Value}]; !ok { 89 m.postings[struct { 90 Name string 91 Value string 92 }{lbl.Name, lbl.Value}] = []storage.SeriesRef{} 93 } 94 m.postings[struct { 95 Name string 96 Value string 97 }{lbl.Name, lbl.Value}] = append(m.postings[struct { 98 Name string 99 Value string 100 }{lbl.Name, lbl.Value}], ref) 101 } 102 m.postings[struct { 103 Name string 104 Value string 105 }{allPostingsKey.Name, allPostingsKey.Value}] = append(m.postings[struct { 106 Name string 107 Value string 108 }{allPostingsKey.Name, allPostingsKey.Value}], ref) 109 110 s := series{l: l} 111 // Actual chunk data is not stored in the index. 112 s.chunks = append(s.chunks, chunks...) 113 m.series[ref] = s 114 115 return nil 116 } 117 118 func (m mockIndex) Close() error { 119 return nil 120 } 121 122 func (m mockIndex) LabelValues(name string) ([]string, error) { 123 values := []string{} 124 for l := range m.postings { 125 if l.Name == name { 126 values = append(values, l.Value) 127 } 128 } 129 return values, nil 130 } 131 132 func (m mockIndex) Postings(name string, values ...string) (Postings, error) { 133 p := []Postings{} 134 for _, value := range values { 135 p = append(p, iter.NewSliceSeekIterator(m.postings[struct { 136 Name string 137 Value string 138 }{Name: name, Value: value}])) 139 } 140 return Merge(p...), nil 141 } 142 143 func (m mockIndex) Series(ref storage.SeriesRef, lset *phlaremodel.Labels, chks *[]ChunkMeta) error { 144 s, ok := m.series[ref] 145 if !ok { 146 return errors.New("not found") 147 } 148 *lset = append((*lset)[:0], s.l...) 149 *chks = append((*chks)[:0], s.chunks...) 150 151 return nil 152 } 153 154 func TestIndexRW_Create_Open(t *testing.T) { 155 dir := t.TempDir() 156 157 fn := filepath.Join(dir, IndexFilename) 158 159 // An empty index must still result in a readable file. 160 iw, err := NewWriter(context.Background(), fn) 161 require.NoError(t, err) 162 require.NoError(t, iw.Close()) 163 164 ir, err := NewFileReader(fn) 165 require.NoError(t, err) 166 require.NoError(t, ir.Close()) 167 168 // Modify magic header must cause open to fail. 169 f, err := os.OpenFile(fn, os.O_WRONLY, 0o666) 170 require.NoError(t, err) 171 _, err = f.WriteAt([]byte{0, 0}, 0) 172 require.NoError(t, err) 173 f.Close() 174 175 _, err = NewFileReader(dir) 176 require.Error(t, err) 177 } 178 179 func TestIndexRW_Postings(t *testing.T) { 180 dir := t.TempDir() 181 182 fn := filepath.Join(dir, IndexFilename) 183 184 iw, err := NewWriter(context.Background(), fn) 185 require.NoError(t, err) 186 187 series := []phlaremodel.Labels{ 188 phlaremodel.LabelsFromStrings("a", "1", "b", "1"), 189 phlaremodel.LabelsFromStrings("a", "1", "b", "2"), 190 phlaremodel.LabelsFromStrings("a", "1", "b", "3"), 191 phlaremodel.LabelsFromStrings("a", "1", "b", "4"), 192 } 193 194 require.NoError(t, iw.AddSymbol("1")) 195 require.NoError(t, iw.AddSymbol("2")) 196 require.NoError(t, iw.AddSymbol("3")) 197 require.NoError(t, iw.AddSymbol("4")) 198 require.NoError(t, iw.AddSymbol("a")) 199 require.NoError(t, iw.AddSymbol("b")) 200 201 // Postings lists are only written if a series with the respective 202 // reference was added before. 203 require.NoError(t, iw.AddSeries(1, series[0], model.Fingerprint(series[0].Hash()))) 204 require.NoError(t, iw.AddSeries(2, series[1], model.Fingerprint(series[1].Hash()))) 205 require.NoError(t, iw.AddSeries(3, series[2], model.Fingerprint(series[2].Hash()))) 206 require.NoError(t, iw.AddSeries(4, series[3], model.Fingerprint(series[3].Hash()))) 207 208 require.NoError(t, iw.Close()) 209 210 ir, err := NewFileReader(fn) 211 require.NoError(t, err) 212 213 p, err := ir.Postings("a", nil, "1") 214 require.NoError(t, err) 215 216 var l phlaremodel.Labels 217 var c []ChunkMeta 218 219 for i := 0; p.Next(); i++ { 220 _, err := ir.Series(p.At(), &l, &c) 221 222 require.NoError(t, err) 223 require.Equal(t, 0, len(c)) 224 require.Equal(t, series[i], l) 225 } 226 require.NoError(t, p.Err()) 227 228 // The label indices are no longer used, so test them by hand here. 229 labelIndices := map[string][]string{} 230 require.NoError(t, ReadOffsetTable(ir.b, ir.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error { 231 if len(key) != 1 { 232 return errors.Errorf("unexpected key length for label indices table %d", len(key)) 233 } 234 235 d := encoding.NewDecbufAt(ir.b, int(off), castagnoliTable) 236 vals := []string{} 237 nc := d.Be32int() 238 if nc != 1 { 239 return errors.Errorf("unexpected number of label indices table names %d", nc) 240 } 241 for i := d.Be32(); i > 0; i-- { 242 v, err := ir.lookupSymbol(d.Be32()) 243 if err != nil { 244 return err 245 } 246 vals = append(vals, v) 247 } 248 labelIndices[key[0]] = vals 249 return d.Err() 250 })) 251 require.Equal(t, map[string][]string{ 252 "a": {"1"}, 253 "b": {"1", "2", "3", "4"}, 254 }, labelIndices) 255 256 require.NoError(t, ir.Close()) 257 } 258 259 func TestPostingsMany(t *testing.T) { 260 dir := t.TempDir() 261 262 fn := filepath.Join(dir, IndexFilename) 263 264 iw, err := NewWriter(context.Background(), fn) 265 require.NoError(t, err) 266 267 // Create a label in the index which has 999 values. 268 symbols := map[string]struct{}{} 269 series := []phlaremodel.Labels{} 270 for i := 1; i < 1000; i++ { 271 v := fmt.Sprintf("%03d", i) 272 series = append(series, phlaremodel.LabelsFromStrings("i", v, "foo", "bar")) 273 symbols[v] = struct{}{} 274 } 275 symbols["i"] = struct{}{} 276 symbols["foo"] = struct{}{} 277 symbols["bar"] = struct{}{} 278 syms := []string{} 279 for s := range symbols { 280 syms = append(syms, s) 281 } 282 sort.Strings(syms) 283 for _, s := range syms { 284 require.NoError(t, iw.AddSymbol(s)) 285 } 286 287 sort.Slice(series, func(i, j int) bool { 288 return series[i].Hash() < series[j].Hash() 289 }) 290 291 for i, s := range series { 292 require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s, model.Fingerprint(s.Hash()))) 293 } 294 require.NoError(t, iw.Close()) 295 296 ir, err := NewFileReader(fn) 297 require.NoError(t, err) 298 defer func() { require.NoError(t, ir.Close()) }() 299 300 cases := []struct { 301 in []string 302 }{ 303 // Simple cases, everything is present. 304 {in: []string{"002"}}, 305 {in: []string{"031", "032", "033"}}, 306 {in: []string{"032", "033"}}, 307 {in: []string{"127", "128"}}, 308 {in: []string{"127", "128", "129"}}, 309 {in: []string{"127", "129"}}, 310 {in: []string{"128", "129"}}, 311 {in: []string{"998", "999"}}, 312 {in: []string{"999"}}, 313 // Before actual values. 314 {in: []string{"000"}}, 315 {in: []string{"000", "001"}}, 316 {in: []string{"000", "002"}}, 317 // After actual values. 318 {in: []string{"999a"}}, 319 {in: []string{"999", "999a"}}, 320 {in: []string{"998", "999", "999a"}}, 321 // In the middle of actual values. 322 {in: []string{"126a", "127", "128"}}, 323 {in: []string{"127", "127a", "128"}}, 324 {in: []string{"127", "127a", "128", "128a", "129"}}, 325 {in: []string{"127", "128a", "129"}}, 326 {in: []string{"128", "128a", "129"}}, 327 {in: []string{"128", "129", "129a"}}, 328 {in: []string{"126a", "126b", "127", "127a", "127b", "128", "128a", "128b", "129", "129a", "129b"}}, 329 } 330 331 for _, c := range cases { 332 it, err := ir.Postings("i", nil, c.in...) 333 require.NoError(t, err) 334 335 got := []string{} 336 var lbls phlaremodel.Labels 337 var metas []ChunkMeta 338 for it.Next() { 339 _, err := ir.Series(it.At(), &lbls, &metas) 340 require.NoError(t, err) 341 got = append(got, lbls.Get("i")) 342 } 343 require.NoError(t, it.Err()) 344 exp := []string{} 345 for _, e := range c.in { 346 if _, ok := symbols[e]; ok && e != "l" { 347 exp = append(exp, e) 348 } 349 } 350 351 // sort expected values by label hash instead of lexicographically by labelset 352 sort.Slice(exp, func(i, j int) bool { 353 return labels.StableHash(labels.FromStrings("i", exp[i], "foo", "bar")) < labels.StableHash(labels.FromStrings("i", exp[j], "foo", "bar")) 354 }) 355 356 require.Equal(t, exp, got, fmt.Sprintf("input: %v", c.in)) 357 } 358 } 359 360 func TestPersistence_index_e2e(t *testing.T) { 361 dir := t.TempDir() 362 363 lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 20000) 364 require.NoError(t, err) 365 366 flbls := make([]phlaremodel.Labels, len(lbls)) 367 for i, ls := range lbls { 368 flbls[i] = make(phlaremodel.Labels, 0, ls.Len()) 369 ls.Range(func(l labels.Label) { 370 flbls[i] = append(flbls[i], &typesv1.LabelPair{Name: l.Name, Value: l.Value}) 371 }) 372 } 373 374 // Sort labels as the index writer expects series in sorted order by fingerprint. 375 sort.Slice(flbls, func(i, j int) bool { 376 return flbls[i].Hash() < flbls[j].Hash() 377 }) 378 379 symbols := map[string]struct{}{} 380 for _, lset := range lbls { 381 lset.Range(func(l labels.Label) { 382 symbols[l.Name] = struct{}{} 383 symbols[l.Value] = struct{}{} 384 }) 385 } 386 387 var input IndexWriterSeriesSlice 388 389 // Generate ChunkMetas for every label set. 390 for i, lset := range flbls { 391 var metas []ChunkMeta 392 393 for j := 0; j <= (i % 20); j++ { 394 metas = append(metas, ChunkMeta{ 395 MinTime: int64(j * 10000), 396 MaxTime: int64((j + 1) * 10000), 397 Checksum: rand.Uint32(), 398 }) 399 } 400 input = append(input, &IndexWriterSeries{ 401 Labels: lset, 402 Chunks: metas, 403 }) 404 } 405 406 iw, err := NewWriter(context.Background(), filepath.Join(dir, IndexFilename)) 407 require.NoError(t, err) 408 409 syms := []string{} 410 for s := range symbols { 411 syms = append(syms, s) 412 } 413 sort.Strings(syms) 414 for _, s := range syms { 415 require.NoError(t, iw.AddSymbol(s)) 416 } 417 418 // Population procedure as done by compaction. 419 var ( 420 postings = NewMemPostings() 421 values = map[string]map[string]struct{}{} 422 ) 423 424 mi := newMockIndex() 425 426 for i, s := range input { 427 err = iw.AddSeries(storage.SeriesRef(i), s.Labels, model.Fingerprint(s.Labels.Hash()), s.Chunks...) 428 require.NoError(t, err) 429 require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.Labels, s.Chunks...)) 430 431 for _, l := range s.Labels { 432 valset, ok := values[l.Name] 433 if !ok { 434 valset = map[string]struct{}{} 435 values[l.Name] = valset 436 } 437 valset[l.Value] = struct{}{} 438 } 439 postings.Add(storage.SeriesRef(i), s.Labels) 440 } 441 442 err = iw.Close() 443 require.NoError(t, err) 444 445 ir, err := NewFileReader(filepath.Join(dir, IndexFilename)) 446 require.NoError(t, err) 447 448 for p := range mi.postings { 449 gotp, err := ir.Postings(p.Name, nil, p.Value) 450 require.NoError(t, err) 451 452 expp, err := mi.Postings(p.Name, p.Value) 453 require.NoError(t, err) 454 455 var lset, explset phlaremodel.Labels 456 var chks, expchks []ChunkMeta 457 458 for gotp.Next() { 459 require.True(t, expp.Next()) 460 461 ref := gotp.At() 462 463 _, err := ir.Series(ref, &lset, &chks) 464 require.NoError(t, err) 465 466 err = mi.Series(expp.At(), &explset, &expchks) 467 require.NoError(t, err) 468 require.Equal(t, explset, lset) 469 require.Equal(t, expchks, chks) 470 } 471 require.False(t, expp.Next(), "Expected no more postings for %q=%q", p.Name, p.Value) 472 require.NoError(t, gotp.Err()) 473 } 474 475 labelPairs := map[string][]string{} 476 for l := range mi.postings { 477 labelPairs[l.Name] = append(labelPairs[l.Name], l.Value) 478 } 479 for k, v := range labelPairs { 480 sort.Strings(v) 481 482 res, err := ir.SortedLabelValues(k) 483 require.NoError(t, err) 484 485 require.Equal(t, len(v), len(res)) 486 for i := 0; i < len(v); i++ { 487 require.Equal(t, v[i], res[i]) 488 } 489 } 490 491 gotSymbols := []string{} 492 it := ir.Symbols() 493 for it.Next() { 494 gotSymbols = append(gotSymbols, it.At()) 495 } 496 require.NoError(t, it.Err()) 497 expSymbols := []string{} 498 for s := range mi.symbols { 499 expSymbols = append(expSymbols, s) 500 } 501 sort.Strings(expSymbols) 502 require.Equal(t, expSymbols, gotSymbols) 503 504 require.NoError(t, ir.Close()) 505 } 506 507 func TestDecbufUvarintWithInvalidBuffer(t *testing.T) { 508 b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) 509 510 db := encoding.NewDecbufUvarintAt(b, 0, castagnoliTable) 511 require.Error(t, db.Err()) 512 } 513 514 func TestReaderWithInvalidBuffer(t *testing.T) { 515 b := RealByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) 516 517 _, err := NewReader(b) 518 require.Error(t, err) 519 } 520 521 // TestNewFileReaderErrorNoOpenFiles ensures that in case of an error no file remains open. 522 func TestNewFileReaderErrorNoOpenFiles(t *testing.T) { 523 dir := testutil.NewTemporaryDirectory("block", t) 524 525 idxName := filepath.Join(dir.Path(), "index") 526 err := os.WriteFile(idxName, []byte("corrupted contents"), 0o666) 527 require.NoError(t, err) 528 529 _, err = NewFileReader(idxName) 530 require.Error(t, err) 531 532 // dir.Close will fail on Win if idxName fd is not closed on error path. 533 dir.Close() 534 } 535 536 func TestSymbols(t *testing.T) { 537 buf := encoding.Encbuf{} 538 539 // Add prefix to the buffer to simulate symbols as part of larger buffer. 540 buf.PutUvarintStr("something") 541 542 symbolsStart := buf.Len() 543 buf.PutBE32int(204) // Length of symbols table. 544 buf.PutBE32int(100) // Number of symbols. 545 for i := 0; i < 100; i++ { 546 // i represents index in unicode characters table. 547 buf.PutUvarintStr(string(rune(i))) // Symbol. 548 } 549 checksum := crc32.Checksum(buf.Get()[symbolsStart+4:], castagnoliTable) 550 buf.PutBE32(checksum) // Check sum at the end. 551 552 s, err := NewSymbols(RealByteSlice(buf.Get()), FormatV2, symbolsStart) 553 require.NoError(t, err) 554 555 // We store only 4 offsets to symbols. 556 require.Equal(t, 32, s.Size()) 557 558 for i := 99; i >= 0; i-- { 559 s, err := s.Lookup(uint32(i)) 560 require.NoError(t, err) 561 require.Equal(t, string(rune(i)), s) 562 } 563 _, err = s.Lookup(100) 564 require.Error(t, err) 565 566 for i := 99; i >= 0; i-- { 567 r, err := s.ReverseLookup(string(rune(i))) 568 require.NoError(t, err) 569 require.Equal(t, uint32(i), r) 570 } 571 _, err = s.ReverseLookup(string(rune(100))) 572 require.Error(t, err) 573 574 iter := s.Iter() 575 i := 0 576 for iter.Next() { 577 require.Equal(t, string(rune(i)), iter.At()) 578 i++ 579 } 580 require.NoError(t, iter.Err()) 581 } 582 583 func TestDecoder_Postings_WrongInput(t *testing.T) { 584 _, _, err := (&Decoder{}).Postings([]byte("the cake is a lie")) 585 require.Error(t, err) 586 } 587 588 func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) { 589 w, err := NewWriter(context.Background(), filepath.Join(t.TempDir(), "index")) 590 require.NoError(t, err) 591 592 require.NoError(t, w.AddSymbol("__name__")) 593 require.NoError(t, w.AddSymbol("metric_1")) 594 require.NoError(t, w.AddSymbol("metric_2")) 595 596 require.NoError(t, w.AddSeries(0, phlaremodel.LabelsFromStrings("__name__", "metric_1", "__name__", "metric_2"), 0)) 597 598 err = w.Close() 599 require.Error(t, err) 600 require.ErrorContains(t, err, "corruption detected when writing postings to index") 601 }