github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/query_test.go (about) 1 package index 2 3 import ( 4 "context" 5 "crypto/rand" 6 "runtime" 7 "sync" 8 "testing" 9 "time" 10 11 "github.com/oklog/ulid/v2" 12 "github.com/stretchr/testify/assert" 13 "github.com/stretchr/testify/require" 14 "go.etcd.io/bbolt" 15 "go.uber.org/atomic" 16 17 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 18 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 19 "github.com/grafana/pyroscope/pkg/model" 20 "github.com/grafana/pyroscope/pkg/test" 21 "github.com/grafana/pyroscope/pkg/util" 22 ) 23 24 func TestIndex_Query(t *testing.T) { 25 db := test.BoltDB(t) 26 ctx := context.Background() 27 28 minT := test.UnixMilli("2024-09-23T08:00:00.000Z") 29 maxT := test.UnixMilli("2024-09-23T09:00:00.000Z") 30 31 md := &metastorev1.BlockMeta{ 32 Id: test.ULID("2024-09-23T08:00:00.001Z"), 33 Tenant: 0, 34 MinTime: minT, 35 MaxTime: maxT, 36 CreatedBy: 1, 37 Datasets: []*metastorev1.Dataset{ 38 {Tenant: 2, Name: 3, MinTime: minT, MaxTime: maxT, Labels: []int32{2, 4, 3, 5, 6}}, 39 {Tenant: 7, Name: 8, MinTime: minT, MaxTime: maxT, Labels: []int32{2, 4, 8, 5, 9}}, 40 }, 41 StringTable: []string{ 42 "", "ingester", 43 "tenant-a", "dataset-a", "service_name", "__profile_type__", "1", 44 "tenant-b", "dataset-b", "4", 45 }, 46 } 47 48 md2 := &metastorev1.BlockMeta{ 49 Id: test.ULID("2024-09-23T08:00:00.002Z"), 50 Tenant: 1, 51 Shard: 1, 52 MinTime: minT, 53 MaxTime: maxT, 54 CreatedBy: 2, 55 Datasets: []*metastorev1.Dataset{ 56 {Tenant: 1, Name: 3, MinTime: minT, MaxTime: maxT, Labels: []int32{2, 4, 3, 5, 6}}, 57 }, 58 StringTable: []string{ 59 "", "tenant-a", "ingester", "dataset-a", "service_name", "__profile_type__", "1", 60 }, 61 } 62 63 md3 := &metastorev1.BlockMeta{ 64 Id: test.ULID("2024-09-23T08:30:00.003Z"), 65 Tenant: 1, 66 Shard: 1, 67 MinTime: minT, 68 MaxTime: maxT, 69 CreatedBy: 2, 70 Datasets: []*metastorev1.Dataset{ 71 {Tenant: 1, Name: 3, MinTime: minT, MaxTime: maxT, Labels: []int32{2, 4, 3, 5, 6}}, 72 }, 73 StringTable: []string{ 74 "", "tenant-a", "ingester", "dataset-a", "service_name", "__profile_type__", "1", 75 }, 76 } 77 78 query := func(t *testing.T, tx *bbolt.Tx, index *Index) { 79 t.Run("GetBlocks", func(t *testing.T) { 80 found, err := index.GetBlocks(tx, &metastorev1.BlockList{Blocks: []string{md.Id}}) 81 require.NoError(t, err) 82 require.NotEmpty(t, found) 83 require.Equal(t, md, found[0]) 84 85 found, err = index.GetBlocks(tx, &metastorev1.BlockList{ 86 Tenant: "tenant-a", 87 Shard: 1, 88 Blocks: []string{md2.Id, md3.Id}, 89 }) 90 require.NoError(t, err) 91 require.NotEmpty(t, found) 92 require.Equal(t, md2, found[0]) 93 require.Equal(t, md3, found[1]) 94 95 found, err = index.GetBlocks(tx, &metastorev1.BlockList{ 96 Tenant: "tenant-b", 97 Shard: 1, 98 Blocks: []string{md.Id}, 99 }) 100 require.NoError(t, err) 101 require.Empty(t, found) 102 103 found, err = index.GetBlocks(tx, &metastorev1.BlockList{ 104 Shard: 1, 105 Blocks: []string{md.Id}, 106 }) 107 require.NoError(t, err) 108 require.Empty(t, found) 109 }) 110 111 t.Run("DatasetFilter", func(t *testing.T) { 112 expected := []*metastorev1.BlockMeta{ 113 { 114 Id: md.Id, 115 Tenant: 0, 116 MinTime: minT, 117 MaxTime: maxT, 118 CreatedBy: 1, 119 Datasets: []*metastorev1.Dataset{{Tenant: 2, Name: 3, MinTime: minT, MaxTime: maxT}}, 120 StringTable: []string{"", "ingester", "tenant-a", "dataset-a"}, 121 }, 122 { 123 Id: md2.Id, 124 Tenant: 1, 125 Shard: 1, 126 MinTime: minT, 127 MaxTime: maxT, 128 CreatedBy: 2, 129 Datasets: []*metastorev1.Dataset{{Tenant: 1, Name: 3, MinTime: minT, MaxTime: maxT}}, 130 StringTable: []string{"", "tenant-a", "ingester", "dataset-a"}, 131 }, 132 { 133 Id: md3.Id, 134 Tenant: 1, 135 Shard: 1, 136 MinTime: minT, 137 MaxTime: maxT, 138 CreatedBy: 2, 139 Datasets: []*metastorev1.Dataset{{Tenant: 1, Name: 3, MinTime: minT, MaxTime: maxT}}, 140 StringTable: []string{"", "tenant-a", "ingester", "dataset-a"}, 141 }, 142 } 143 144 found, err := index.QueryMetadata(tx, ctx, MetadataQuery{ 145 Expr: `{service_name=~"dataset-a"}`, 146 StartTime: time.UnixMilli(minT), 147 EndTime: time.UnixMilli(maxT), 148 Tenant: []string{"tenant-a", "tenant-b"}, 149 }) 150 require.NoError(t, err) 151 require.Equal(t, expected, found) 152 }) 153 154 t.Run("DatasetTenantFilter", func(t *testing.T) { 155 expected := []*metastorev1.BlockMeta{ 156 { 157 Id: md.Id, 158 Tenant: 0, 159 MinTime: minT, 160 MaxTime: maxT, 161 CreatedBy: 1, 162 Datasets: []*metastorev1.Dataset{{Tenant: 2, Name: 3, MinTime: minT, MaxTime: maxT}}, 163 StringTable: []string{"", "ingester", "tenant-b", "dataset-b"}, 164 }, 165 } 166 167 found, err := index.QueryMetadata(tx, ctx, MetadataQuery{ 168 Expr: `{}`, 169 StartTime: time.UnixMilli(minT), 170 EndTime: time.UnixMilli(maxT + 1), 171 Tenant: []string{"tenant-b"}, 172 }) 173 require.NoError(t, err) 174 require.Equal(t, expected, found) 175 }) 176 177 t.Run("DatasetTenantFilterNotExisting", func(t *testing.T) { 178 found, err := index.QueryMetadata(tx, ctx, MetadataQuery{ 179 Expr: `{}`, 180 StartTime: time.UnixMilli(minT), 181 EndTime: time.UnixMilli(maxT + 1), 182 Tenant: []string{"tenant-not-found"}, 183 }) 184 require.NoError(t, err) 185 require.Empty(t, found) 186 }) 187 188 t.Run("DatasetFilter_KeepLabels", func(t *testing.T) { 189 expected := []*metastorev1.BlockMeta{ 190 { 191 Id: md.Id, 192 Tenant: 0, 193 MinTime: minT, 194 MaxTime: maxT, 195 CreatedBy: 1, 196 Datasets: []*metastorev1.Dataset{{ 197 Tenant: 2, 198 Name: 3, 199 MinTime: minT, 200 MaxTime: maxT, 201 Labels: []int32{1, 4, 3}, 202 }}, 203 StringTable: []string{"", "ingester", "tenant-a", "dataset-a", "service_name"}, 204 }, 205 { 206 Id: md2.Id, 207 Tenant: 1, 208 Shard: 1, 209 MinTime: minT, 210 MaxTime: maxT, 211 CreatedBy: 2, 212 Datasets: []*metastorev1.Dataset{{ 213 Tenant: 1, 214 Name: 3, 215 MinTime: minT, 216 MaxTime: maxT, 217 Labels: []int32{1, 4, 3}, 218 }}, 219 StringTable: []string{"", "tenant-a", "ingester", "dataset-a", "service_name"}, 220 }, 221 { 222 Id: md3.Id, 223 Tenant: 1, 224 Shard: 1, 225 MinTime: minT, 226 MaxTime: maxT, 227 CreatedBy: 2, 228 Datasets: []*metastorev1.Dataset{{ 229 Tenant: 1, 230 Name: 3, 231 MinTime: minT, 232 MaxTime: maxT, 233 Labels: []int32{1, 4, 3}, 234 }}, 235 StringTable: []string{"", "tenant-a", "ingester", "dataset-a", "service_name"}, 236 }, 237 } 238 239 found, err := index.QueryMetadata(tx, ctx, MetadataQuery{ 240 Expr: `{service_name=~"dataset-a"}`, 241 StartTime: time.UnixMilli(minT), 242 EndTime: time.UnixMilli(maxT), 243 Tenant: []string{"tenant-a", "tenant-b"}, 244 Labels: []string{"service_name"}, 245 }) 246 require.NoError(t, err) 247 require.Equal(t, expected, found) 248 }) 249 250 t.Run("TimeRangeFilter", func(t *testing.T) { 251 found, err := index.QueryMetadata(tx, ctx, MetadataQuery{ 252 Expr: `{service_name=~"dataset-b"}`, 253 StartTime: time.UnixMilli(minT - 3), 254 EndTime: time.UnixMilli(minT - 1), // dataset-b starts at minT 255 Tenant: []string{"tenant-b"}, 256 }) 257 require.NoError(t, err) 258 require.Empty(t, found) 259 }) 260 261 t.Run("Labels", func(t *testing.T) { 262 labels, err := index.QueryMetadataLabels(tx, ctx, MetadataQuery{ 263 Expr: `{service_name=~"dataset.*"}`, 264 StartTime: time.UnixMilli(minT), 265 EndTime: time.UnixMilli(maxT), 266 Tenant: []string{"tenant-a"}, 267 Labels: []string{ 268 model.LabelNameProfileType, 269 model.LabelNameServiceName, 270 }, 271 }) 272 require.NoError(t, err) 273 require.NotEmpty(t, labels) 274 assert.Equal(t, []*typesv1.Labels{{Labels: []*typesv1.LabelPair{ 275 {Name: model.LabelNameProfileType, Value: "1"}, 276 {Name: model.LabelNameServiceName, Value: "dataset-a"}, 277 }}}, labels) 278 }) 279 280 t.Run("LabelsTenantFilter", func(t *testing.T) { 281 labels, err := index.QueryMetadataLabels(tx, ctx, MetadataQuery{ 282 Expr: "{}", 283 StartTime: time.UnixMilli(minT), 284 EndTime: time.UnixMilli(maxT), 285 Tenant: []string{"tenant-b"}, 286 Labels: []string{ 287 model.LabelNameProfileType, 288 model.LabelNameServiceName, 289 }, 290 }) 291 require.NoError(t, err) 292 require.NotEmpty(t, labels) 293 assert.Equal(t, []*typesv1.Labels{{Labels: []*typesv1.LabelPair{ 294 {Name: model.LabelNameProfileType, Value: "4"}, 295 {Name: model.LabelNameServiceName, Value: "dataset-b"}, 296 }}}, labels) 297 }) 298 } 299 300 idx := NewIndex(util.Logger, NewStore(), DefaultConfig) 301 tx, err := db.Begin(true) 302 require.NoError(t, err) 303 require.NoError(t, idx.Init(tx)) 304 require.NoError(t, idx.InsertBlock(tx, md.CloneVT())) 305 require.NoError(t, idx.InsertBlock(tx, md2.CloneVT())) 306 require.NoError(t, idx.InsertBlock(tx, md3.CloneVT())) 307 require.NoError(t, tx.Commit()) 308 309 t.Run("BeforeRestore", func(t *testing.T) { 310 tx, err := db.Begin(false) 311 require.NoError(t, err) 312 query(t, tx, idx) 313 require.NoError(t, tx.Rollback()) 314 }) 315 316 t.Run("Restored", func(t *testing.T) { 317 idx = NewIndex(util.Logger, NewStore(), DefaultConfig) 318 tx, err = db.Begin(false) 319 defer func() { 320 require.NoError(t, tx.Rollback()) 321 }() 322 require.NoError(t, err) 323 require.NoError(t, idx.Restore(tx)) 324 query(t, tx, idx) 325 }) 326 } 327 328 func TestIndex_QueryConcurrency(t *testing.T) { 329 const N = 10 330 for i := 0; i < N && !t.Failed(); i++ { 331 q := new(queryTestSuite) 332 q.run(t) 333 } 334 } 335 336 type queryTestSuite struct { 337 db *bbolt.DB 338 idx *Index 339 blocks atomic.Pointer[metastorev1.BlockList] 340 341 from string 342 tenant string 343 shard uint32 344 345 wg sync.WaitGroup 346 stop chan struct{} 347 doStop func() 348 349 writes atomic.Int32 350 queries atomic.Int32 351 } 352 353 // Possible invariants: 354 // 1. (001) No blocks are found. 355 // 2. (010) Only source blocks are found (1-10). 356 // 3. (100) Only compacted blocks are found (always 4). 357 358 const ( 359 noBlocks = 1 << iota 360 sourceBlocks 361 compactedBlocks 362 363 all = noBlocks | sourceBlocks | compactedBlocks 364 ) 365 366 func (s *queryTestSuite) setup(t *testing.T) { 367 var once sync.Once 368 s.stop = make(chan struct{}) 369 s.doStop = func() { 370 once.Do(func() { 371 close(s.stop) 372 }) 373 } 374 375 s.from = "2024-09-23T08:00:00.000Z" 376 s.tenant = "tenant" 377 s.shard = 1 378 s.blocks.Store(&metastorev1.BlockList{}) 379 380 s.db = test.BoltDB(t) 381 s.idx = NewIndex(util.Logger, NewStore(), DefaultConfig) 382 // Enforce aggressive cache evictions: 383 s.idx.config.partitionDuration = time.Minute * 30 384 s.idx.config.ShardCacheSize = 3 385 s.idx.config.BlockReadCacheSize = 3 386 s.idx.config.BlockWriteCacheSize = 3 387 require.NoError(t, s.db.Update(s.idx.Init)) 388 } 389 390 func (s *queryTestSuite) teardown(t *testing.T) { 391 require.NoError(t, s.db.Close()) 392 } 393 394 func (s *queryTestSuite) run(t *testing.T) { 395 s.setup(t) 396 defer s.teardown(t) 397 398 done := make(chan struct{}) 399 go func() { 400 defer close(done) 401 for { 402 select { 403 case <-s.stop: 404 return 405 default: 406 s.writeBlocks(t) 407 } 408 } 409 }() 410 411 ctx := context.Background() 412 s.runQuery(t, ctx, s.queryBlocks) 413 s.runQuery(t, ctx, s.queryLabels) 414 s.runQuery(t, ctx, s.getBlocks) 415 416 go func() { 417 select { 418 case <-s.stop: 419 case <-time.After(30 * time.Second): 420 t.Error("test time out: query consistency not confirmed") 421 s.doStop() 422 } 423 }() 424 425 s.wg.Wait() 426 // If we haven't failed the test, we can conclude that 427 // no races, no deadlocks, no inconsistencies were found. 428 s.doStop() 429 // Wait for the write goroutine to finish, so we can 430 // safely tear down the test. 431 <-done 432 t.Logf("writes: %d, queries: %d", s.writes.Load(), s.queries.Load()) 433 } 434 435 func (s *queryTestSuite) createBlock(id ulid.ULID, dur time.Duration, tenant string, shard, level uint32) *metastorev1.BlockMeta { 436 minT := ulid.Time(id.Time()).UnixMilli() 437 maxT := minT + dur.Milliseconds() 438 tid := int32(0) 439 if level > 0 { 440 tid = 1 441 } 442 return &metastorev1.BlockMeta{ 443 Id: id.String(), 444 Tenant: tid, 445 Shard: shard, 446 MinTime: minT, 447 MaxTime: maxT, 448 CompactionLevel: level, 449 Datasets: []*metastorev1.Dataset{{Tenant: 1, MinTime: minT, MaxTime: maxT, Labels: []int32{1, 2, 3}}}, 450 StringTable: []string{"", tenant, "service_name", "service"}, 451 } 452 } 453 454 func (s *queryTestSuite) createBlocks(from time.Time, dur time.Duration, n int, tenant string, shard, level uint32) (blocks []*metastorev1.BlockMeta) { 455 cur := from 456 for i := 0; i < n; i++ { 457 b := s.createBlock(ulid.MustNew(ulid.Timestamp(cur), rand.Reader), dur, tenant, shard, level) 458 blocks = append(blocks, b) 459 cur = cur.Add(dur) 460 } 461 return blocks 462 } 463 464 func (s *queryTestSuite) writeBlocks(t *testing.T) { 465 t.Helper() 466 467 // Create source blocks. 468 source := s.createBlocks(test.Time(s.from), time.Minute*10, 10, s.tenant, s.shard, 0) 469 sourceList := &metastorev1.BlockList{ 470 // Tenant: s.tenant, // O level blocks are anonymous. 471 Shard: s.shard, 472 Blocks: make([]string, len(source)), 473 } 474 for i, b := range source { 475 sourceList.Blocks[i] = b.Id 476 } 477 // Blocks are inserted one by one, each within its own transaction. 478 for i := range source { 479 require.NoError(t, s.db.Update(func(tx *bbolt.Tx) error { 480 return s.idx.InsertBlock(tx, source[i]) 481 })) 482 s.writes.Inc() 483 } 484 485 // We make the blocks visible to our test queries. 486 s.blocks.Store(sourceList) 487 // Give other goroutines a chance. 488 runtime.Gosched() 489 490 // Replace with compacted. 491 compacted := s.createBlocks(test.Time(s.from), time.Minute*15, 4, s.tenant, s.shard, 1) 492 compactedList := &metastorev1.BlockList{ 493 Tenant: s.tenant, 494 Shard: s.shard, 495 Blocks: make([]string, len(compacted)), 496 } 497 for i, b := range compacted { 498 compactedList.Blocks[i] = b.Id 499 } 500 require.NoError(t, s.db.Update(func(tx *bbolt.Tx) error { 501 return s.idx.ReplaceBlocks(tx, &metastorev1.CompactedBlocks{ 502 SourceBlocks: sourceList, 503 NewBlocks: compacted, 504 }) 505 })) 506 s.writes.Inc() 507 508 // After we replaced the source blocks with compacted blocks, 509 // we want our test queries to check them. 510 s.blocks.Store(compactedList) 511 runtime.Gosched() 512 513 // Delete all blocks. 514 require.NoError(t, s.db.Update(func(tx *bbolt.Tx) error { 515 return s.idx.ReplaceBlocks(tx, &metastorev1.CompactedBlocks{ 516 SourceBlocks: compactedList, 517 }) 518 })) 519 s.writes.Inc() 520 521 s.blocks.Store(&metastorev1.BlockList{}) 522 runtime.Gosched() 523 } 524 525 func (s *queryTestSuite) runQuery(t *testing.T, ctx context.Context, q func(*testing.T, context.Context) int32) { 526 t.Helper() 527 528 var ret int32 529 s.wg.Add(1) 530 531 go func() { 532 defer s.wg.Done() 533 for { 534 select { 535 case <-s.stop: 536 return 537 538 default: 539 s.queries.Inc() 540 x := q(t, ctx) 541 if x < 0 { 542 s.doStop() 543 return 544 } 545 if ret |= x; ret == all { 546 return 547 } 548 } 549 } 550 }() 551 } 552 553 func (s *queryTestSuite) queryBlocks(t *testing.T, ctx context.Context) (ret int32) { 554 var x []*metastorev1.BlockMeta 555 var err error 556 require.NoError(t, s.db.View(func(tx *bbolt.Tx) error { 557 x, err = s.idx.QueryMetadata(tx, ctx, MetadataQuery{ 558 Expr: `{service_name="service"}`, 559 StartTime: test.Time(s.from), 560 EndTime: test.Time(s.from).Add(2 * time.Hour), 561 Tenant: []string{s.tenant}, 562 Labels: []string{"service_name"}, 563 }) 564 return err 565 })) 566 567 // It's expected that we may query the data before 568 // any blocks are written. 569 if len(x) == 0 { 570 return noBlocks 571 } 572 var c uint32 573 for i := range x { 574 c += x[i].CompactionLevel 575 } 576 577 if len(x) <= 10 && c == 0 { 578 // All of level 0: note that the source blocks 579 // may be seen while they are being inserted. 580 return sourceBlocks 581 } 582 583 if len(x) == int(c) && c == 4 { 584 // All of level 1: note that compacted blocks 585 // should be added atomically. 586 return compactedBlocks 587 } 588 589 t.Error("query blocks: inconsistent results") 590 for i := range x { 591 t.Log("\t", x[i]) 592 } 593 594 return -1 595 } 596 597 func (s *queryTestSuite) queryLabels(t *testing.T, ctx context.Context) (ret int32) { 598 var x []*typesv1.Labels 599 var err error 600 require.NoError(t, s.db.View(func(tx *bbolt.Tx) error { 601 x, err = s.idx.QueryMetadataLabels(tx, ctx, MetadataQuery{ 602 Expr: `{service_name="service"}`, 603 StartTime: test.Time(s.from), 604 EndTime: test.Time(s.from).Add(2 * time.Hour), 605 Tenant: []string{s.tenant}, 606 Labels: []string{"service_name"}, 607 }) 608 return err 609 })) 610 611 if len(x) == 0 { 612 return noBlocks 613 } 614 615 // Inconsistent labels/strings. 616 assert.EqualValues(t, 1, len(x)) 617 assert.EqualValues(t, 1, len(x[0].Labels)) 618 assert.Equal(t, x[0].Labels[0].Name, "service_name") 619 assert.Equal(t, x[0].Labels[0].Value, "service") 620 621 // We can't distinguish between source 622 // and compacted blocks here. 623 return sourceBlocks | compactedBlocks 624 } 625 626 func (s *queryTestSuite) getBlocks(t *testing.T, _ context.Context) (ret int32) { 627 var x []*metastorev1.BlockMeta 628 var err error 629 // The writer ensures that the list is set after it finished writes. 630 // If we get the list within the transaction, we may observe partial 631 // source blocks [0-9]: this means the read transaction was open while 632 // not all the blocks were written. 633 blocks := s.blocks.Load() 634 require.NoError(t, s.db.View(func(tx *bbolt.Tx) error { 635 x, err = s.idx.GetBlocks(tx, blocks) 636 return err 637 })) 638 639 // Same as queryBlocks except that we do not expect 640 // to see partial source blocks. 641 if len(x) == 0 { 642 return noBlocks 643 } 644 var c uint32 645 for i := range x { 646 c += x[i].CompactionLevel 647 } 648 649 if len(x) == 10 && c == 0 { 650 return sourceBlocks 651 } 652 653 if len(x) == int(c) && c == 4 { 654 return compactedBlocks 655 } 656 657 t.Error("find blocks: inconsistent results") 658 for i := range x { 659 t.Log("\t", x[i]) 660 } 661 662 return -1 663 }