github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/block_querier.go (about) 1 package phlaredb 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "math" 9 "path/filepath" 10 "slices" 11 "sort" 12 "strings" 13 "sync" 14 "time" 15 16 "connectrpc.com/connect" 17 "github.com/go-kit/log" 18 "github.com/go-kit/log/level" 19 "github.com/gogo/status" 20 "github.com/grafana/dskit/multierror" 21 "github.com/grafana/dskit/runutil" 22 "github.com/oklog/ulid/v2" 23 "github.com/opentracing/opentracing-go" 24 otlog "github.com/opentracing/opentracing-go/log" 25 "github.com/parquet-go/parquet-go" 26 "github.com/pkg/errors" 27 "github.com/prometheus/common/model" 28 "github.com/prometheus/prometheus/promql/parser" 29 "github.com/prometheus/prometheus/storage" 30 "github.com/samber/lo" 31 "golang.org/x/sync/errgroup" 32 "google.golang.org/grpc/codes" 33 34 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 35 ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" 36 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 37 "github.com/grafana/pyroscope/pkg/iter" 38 phlaremodel "github.com/grafana/pyroscope/pkg/model" 39 phlareobj "github.com/grafana/pyroscope/pkg/objstore" 40 parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet" 41 "github.com/grafana/pyroscope/pkg/phlaredb/block" 42 "github.com/grafana/pyroscope/pkg/phlaredb/query" 43 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 44 "github.com/grafana/pyroscope/pkg/phlaredb/symdb" 45 "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" 46 "github.com/grafana/pyroscope/pkg/pprof" 47 phlarecontext "github.com/grafana/pyroscope/pkg/pyroscope/context" 48 "github.com/grafana/pyroscope/pkg/util" 49 ) 50 51 const ( 52 defaultBatchSize = 64 << 10 53 54 // This controls the buffer size for reads to a parquet io.Reader. This value should be small for memory or 55 // disk backed readers, but when the reader is backed by network storage a larger size will be advantageous. 56 // 57 // The chosen value should be larger than the page size. Page sizes depend on the write buffer size as well as 58 // on how well the data is encoded. In practice, they tend to be around 1MB. 59 parquetReadBufferSize = 2 << 20 60 ) 61 62 type tableReader interface { 63 open(ctx context.Context, bucketReader phlareobj.BucketReader) error 64 io.Closer 65 } 66 67 type BlockQuerier struct { 68 phlarectx context.Context 69 logger log.Logger 70 71 bkt phlareobj.Bucket 72 73 queriers []*singleBlockQuerier 74 queriersLock sync.RWMutex 75 } 76 77 func NewBlockQuerier(phlarectx context.Context, bucketReader phlareobj.Bucket) *BlockQuerier { 78 return &BlockQuerier{ 79 phlarectx: ContextWithBlockMetrics(phlarectx, 80 NewBlocksMetrics( 81 phlarecontext.Registry(phlarectx), 82 ), 83 ), 84 logger: phlarecontext.Logger(phlarectx), 85 bkt: bucketReader, 86 } 87 } 88 89 func (b *BlockQuerier) Queriers() Queriers { 90 b.queriersLock.RLock() 91 defer b.queriersLock.RUnlock() 92 93 res := make([]Querier, 0, len(b.queriers)) 94 for _, q := range b.queriers { 95 res = append(res, q) 96 } 97 return res 98 } 99 100 func (b *BlockQuerier) BlockMetas(ctx context.Context) (metas []*block.Meta, _ error) { 101 var names []ulid.ULID 102 if err := b.bkt.Iter(ctx, "", func(n string) error { 103 ulid, ok := block.IsBlockDir(n) 104 if !ok { 105 return nil 106 } 107 names = append(names, ulid) 108 return nil 109 }); err != nil { 110 return nil, err 111 } 112 113 g, ctx := errgroup.WithContext(ctx) 114 g.SetLimit(16) 115 metas = make([]*block.Meta, len(names)) 116 for pos := range names { 117 func(pos int) { 118 g.Go(util.RecoverPanic(func() error { 119 path := filepath.Join(names[pos].String(), block.MetaFilename) 120 metaReader, err := b.bkt.Get(ctx, path) 121 if err != nil { 122 level.Error(b.logger).Log("msg", "error reading block meta", "block", path, "err", err) 123 return nil 124 } 125 126 metas[pos], err = block.Read(metaReader) 127 if err != nil { 128 level.Error(b.logger).Log("msg", "error parsing block meta", "block", path, "err", err) 129 return nil 130 } 131 return nil 132 })) 133 }(pos) 134 } 135 136 if err := g.Wait(); err != nil { 137 return nil, err 138 } 139 140 // sort slice and make sure nils are last 141 sort.Slice(metas, func(i, j int) bool { 142 if metas[i] == nil { 143 return false 144 } 145 if metas[j] == nil { 146 return true 147 } 148 return metas[i].MinTime < metas[j].MinTime 149 }) 150 151 // iterate from the end and cut of till the first non-nil 152 var pos int 153 for pos = len(metas) - 1; pos >= 0; pos-- { 154 if metas[pos] != nil { 155 break 156 } 157 } 158 159 return metas[0 : pos+1], nil 160 } 161 162 func (b *BlockQuerier) BlockMeta(ctx context.Context, name string) (meta *block.Meta, _ error) { 163 path := filepath.Join(name, block.MetaFilename) 164 metaReader, err := b.bkt.Get(ctx, path) 165 if err != nil { 166 level.Error(b.logger).Log("msg", "error reading block meta", "block", path, "err", err) 167 return nil, err 168 } 169 170 meta, err = block.Read(metaReader) 171 if err != nil { 172 level.Error(b.logger).Log("msg", "error parsing block meta", "block", path, "err", err) 173 return nil, err 174 } 175 176 return meta, nil 177 } 178 179 // Sync gradually scans the available blocks. If there are any changes to the 180 // last run it will Open/Close new/no longer existing ones. 181 func (b *BlockQuerier) Sync(ctx context.Context) error { 182 observedMetas, err := b.BlockMetas(ctx) 183 if err != nil { 184 return err 185 } 186 187 // hold write lock to queriers 188 b.queriersLock.Lock() 189 190 // build lookup map 191 192 querierByULID := make(map[ulid.ULID]*singleBlockQuerier) 193 194 for pos := range b.queriers { 195 querierByULID[b.queriers[pos].meta.ULID] = b.queriers[pos] 196 } 197 198 // ensure queries has the right length 199 lenQueriers := len(observedMetas) 200 if cap(b.queriers) < lenQueriers { 201 b.queriers = make([]*singleBlockQuerier, lenQueriers) 202 } else { 203 b.queriers = b.queriers[:lenQueriers] 204 } 205 206 for pos, m := range observedMetas { 207 208 q, ok := querierByULID[m.ULID] 209 if ok { 210 b.queriers[pos] = q 211 delete(querierByULID, m.ULID) 212 continue 213 } 214 215 b.queriers[pos] = NewSingleBlockQuerierFromMeta(b.phlarectx, b.bkt, m) 216 } 217 // ensure queriers are in ascending order. 218 sort.Slice(b.queriers, func(i, j int) bool { 219 return b.queriers[i].meta.MinTime < b.queriers[j].meta.MinTime 220 }) 221 b.queriersLock.Unlock() 222 223 // now close no longer available queries 224 for _, q := range querierByULID { 225 if err := q.Close(); err != nil { 226 return err 227 } 228 } 229 230 return nil 231 } 232 233 func (b *BlockQuerier) AddBlockQuerierByMeta(m *block.Meta) { 234 q := NewSingleBlockQuerierFromMeta(b.phlarectx, b.bkt, m) 235 b.queriersLock.Lock() 236 defer b.queriersLock.Unlock() 237 i := sort.Search(len(b.queriers), func(i int) bool { 238 return b.queriers[i].meta.MinTime >= m.MinTime 239 }) 240 if i < len(b.queriers) && b.queriers[i].meta.ULID == m.ULID { 241 // Block with this meta is already present, skipping. 242 return 243 } 244 b.queriers = append(b.queriers, q) // Ensure we have enough capacity. 245 copy(b.queriers[i+1:], b.queriers[i:]) 246 b.queriers[i] = q 247 } 248 249 // evict removes the block with the given ULID from the querier. 250 func (b *BlockQuerier) evict(blockID ulid.ULID) (bool, error) { 251 b.queriersLock.Lock() 252 // N.B: queriers are sorted by meta.MinTime. 253 j := -1 254 for i, q := range b.queriers { 255 if q.meta.ULID.Compare(blockID) == 0 { 256 j = i 257 break 258 } 259 } 260 if j < 0 { 261 b.queriersLock.Unlock() 262 return false, nil 263 } 264 blockQuerier := b.queriers[j] 265 // Delete the querier from the slice and make it eligible for GC. 266 copy(b.queriers[j:], b.queriers[j+1:]) 267 b.queriers[len(b.queriers)-1] = nil 268 b.queriers = b.queriers[:len(b.queriers)-1] 269 b.queriersLock.Unlock() 270 return true, blockQuerier.Close() 271 } 272 273 func (b *BlockQuerier) Close() error { 274 b.queriersLock.Lock() 275 defer b.queriersLock.Unlock() 276 277 errs := multierror.New() 278 for pos := range b.queriers { 279 if err := b.queriers[pos].Close(); err != nil { 280 errs.Add(err) 281 } 282 } 283 return errs.Err() 284 } 285 286 type TableInfo struct { 287 Rows uint64 288 RowGroups uint64 289 Bytes uint64 290 } 291 292 type BlockInfo struct { 293 ID ulid.ULID 294 MinTime model.Time 295 MaxTime model.Time 296 Profiles TableInfo 297 Stacktraces TableInfo 298 Locations TableInfo 299 Functions TableInfo 300 Mappings TableInfo 301 Strings TableInfo 302 Series uint64 303 } 304 305 type singleBlockQuerier struct { 306 logger log.Logger 307 metrics *BlocksMetrics 308 309 bucket phlareobj.Bucket 310 meta *block.Meta 311 312 tables []tableReader 313 314 queries sync.WaitGroup 315 openLock sync.Mutex 316 opened bool 317 index *index.Reader 318 profiles map[profileTableKey]*parquetReader[*schemav1.ProfilePersister] 319 symbols symbolsResolver 320 } 321 322 type profileTableKey struct { 323 resolution time.Duration 324 aggregation string 325 } 326 327 func NewSingleBlockQuerierFromMeta(phlarectx context.Context, bucketReader phlareobj.Bucket, meta *block.Meta) *singleBlockQuerier { 328 q := &singleBlockQuerier{ 329 logger: phlarecontext.Logger(phlarectx), 330 metrics: blockMetricsFromContext(phlarectx), 331 profiles: make(map[profileTableKey]*parquetReader[*schemav1.ProfilePersister], 3), 332 bucket: phlareobj.NewPrefixedBucket(bucketReader, meta.ULID.String()), 333 meta: meta, 334 } 335 for _, f := range meta.Files { 336 k, ok := parseProfileTableName(f.RelPath) 337 if ok { 338 r := &parquetReader[*schemav1.ProfilePersister]{meta: f} 339 q.profiles[k] = r 340 q.tables = append(q.tables, r) 341 } 342 } 343 return q 344 } 345 346 func (b *singleBlockQuerier) Profiles() ProfileReader { 347 return b.profileSourceTable().file 348 } 349 350 func (b *singleBlockQuerier) Index() IndexReader { 351 return b.index 352 } 353 354 func (b *singleBlockQuerier) Symbols() symdb.SymbolsReader { 355 return b.symbols 356 } 357 358 func (b *singleBlockQuerier) Meta() block.Meta { 359 if b.meta == nil { 360 return block.Meta{} 361 } 362 return *b.meta 363 } 364 365 func (b *singleBlockQuerier) ProfileTypes(ctx context.Context, req *connect.Request[ingestv1.ProfileTypesRequest]) (*connect.Response[ingestv1.ProfileTypesResponse], error) { 366 sp, ctx := opentracing.StartSpanFromContext(ctx, "ProfileTypes Block") 367 defer sp.Finish() 368 369 if err := b.Open(ctx); err != nil { 370 return nil, err 371 } 372 b.queries.Add(1) 373 defer b.queries.Done() 374 375 values, err := b.index.LabelValues(phlaremodel.LabelNameProfileType) 376 if err != nil { 377 return nil, err 378 } 379 slices.Sort(values) 380 381 types := make([]*typesv1.ProfileType, len(values)) 382 for i, value := range values { 383 typ, err := phlaremodel.ParseProfileTypeSelector(value) 384 if err != nil { 385 return nil, err 386 } 387 types[i] = typ 388 } 389 390 return connect.NewResponse(&ingestv1.ProfileTypesResponse{ 391 ProfileTypes: types, 392 }), nil 393 } 394 395 func (b *singleBlockQuerier) LabelValues(ctx context.Context, req *connect.Request[typesv1.LabelValuesRequest]) (*connect.Response[typesv1.LabelValuesResponse], error) { 396 sp, ctx := opentracing.StartSpanFromContext(ctx, "LabelValues Block") 397 defer sp.Finish() 398 399 params := req.Msg 400 401 if err := b.Open(ctx); err != nil { 402 return nil, err 403 } 404 b.queries.Add(1) 405 defer b.queries.Done() 406 407 names, err := b.index.LabelNames() 408 if err != nil { 409 return nil, err 410 } 411 if !slices.Contains(names, req.Msg.Name) { 412 return connect.NewResponse(&typesv1.LabelValuesResponse{ 413 Names: []string{}, 414 }), nil 415 } 416 417 selectors, err := parseSelectors(params.Matchers) 418 if err != nil { 419 return nil, err 420 } 421 422 iters := make([]index.Postings, 0, 1) 423 if selectors.matchesAll() { 424 k, v := index.AllPostingsKey() 425 iter, err := b.index.Postings(k, nil, v) 426 if err != nil { 427 return nil, err 428 } 429 iters = append(iters, iter) 430 } else { 431 for _, matchers := range selectors { 432 iter, err := PostingsForMatchers(b.index, nil, matchers...) 433 if err != nil { 434 return nil, err 435 } 436 iters = append(iters, iter) 437 } 438 } 439 440 valueSet := make(map[string]struct{}) 441 iter := index.Intersect(iters...) 442 for iter.Next() { 443 value, err := b.index.LabelValueFor(iter.At(), req.Msg.Name) 444 if err != nil { 445 if err == storage.ErrNotFound { 446 continue 447 } 448 return nil, err 449 } 450 valueSet[value] = struct{}{} 451 } 452 453 values := make([]string, 0, len(valueSet)) 454 for value := range valueSet { 455 values = append(values, value) 456 } 457 slices.Sort(values) 458 return connect.NewResponse(&typesv1.LabelValuesResponse{ 459 Names: values, 460 }), nil 461 } 462 463 func (b *singleBlockQuerier) LabelNames(ctx context.Context, req *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) { 464 sp, ctx := opentracing.StartSpanFromContext(ctx, "LabelNames Block") 465 defer sp.Finish() 466 467 params := req.Msg 468 469 if err := b.Open(ctx); err != nil { 470 return nil, err 471 } 472 b.queries.Add(1) 473 defer b.queries.Done() 474 475 selectors, err := parseSelectors(params.Matchers) 476 if err != nil { 477 return nil, err 478 } 479 480 if selectors.matchesAll() { 481 names, err := b.index.LabelNames() 482 if err != nil { 483 return nil, err 484 } 485 return connect.NewResponse(&typesv1.LabelNamesResponse{ 486 Names: names, 487 }), nil 488 } 489 490 var iters []index.Postings 491 for _, matchers := range selectors { 492 iter, err := PostingsForMatchers(b.index, nil, matchers...) 493 if err != nil { 494 return nil, err 495 } 496 iters = append(iters, iter) 497 } 498 499 nameSet := make(map[string]struct{}) 500 iter := index.Intersect(iters...) 501 for iter.Next() { 502 names, err := b.index.LabelNamesFor(iter.At()) 503 if err != nil { 504 if err == storage.ErrNotFound { 505 continue 506 } 507 return nil, err 508 } 509 510 for _, name := range names { 511 nameSet[name] = struct{}{} 512 } 513 } 514 515 names := make([]string, 0, len(nameSet)) 516 for name := range nameSet { 517 names = append(names, name) 518 } 519 slices.Sort(names) 520 return connect.NewResponse(&typesv1.LabelNamesResponse{ 521 Names: names, 522 }), nil 523 } 524 525 func (b *singleBlockQuerier) BlockID() string { 526 return b.meta.ULID.String() 527 } 528 529 func (b *singleBlockQuerier) Close() error { 530 b.openLock.Lock() 531 defer func() { 532 b.openLock.Unlock() 533 b.metrics.blockOpened.Dec() 534 }() 535 536 if !b.opened { 537 return nil 538 } 539 b.queries.Wait() 540 541 errs := multierror.New() 542 if b.index != nil { 543 err := b.index.Close() 544 b.index = nil 545 if err != nil { 546 errs.Add(err) 547 } 548 } 549 for _, t := range b.tables { 550 if err := t.Close(); err != nil { 551 errs.Add(err) 552 } 553 } 554 if b.symbols != nil { 555 if err := b.symbols.Close(); err != nil { 556 errs.Add(err) 557 } 558 } 559 b.opened = false 560 return errs.Err() 561 } 562 563 func (b *singleBlockQuerier) Bounds() (model.Time, model.Time) { 564 return b.meta.MinTime, b.meta.MaxTime 565 } 566 567 func (b *singleBlockQuerier) GetMetaStats() block.MetaStats { 568 return b.meta.GetStats() 569 } 570 571 type Profile interface { 572 RowNumber() int64 573 StacktracePartition() uint64 574 Timestamp() model.Time 575 Fingerprint() model.Fingerprint 576 Labels() phlaremodel.Labels 577 } 578 579 type Querier interface { 580 // BlockID returns the block ID of the querier, when it is representing a single block. 581 BlockID() string 582 Bounds() (model.Time, model.Time) 583 Open(ctx context.Context) error 584 Sort([]Profile) []Profile 585 586 MergeByStacktraces(ctx context.Context, rows iter.Iterator[Profile], maxNodes int64) (*phlaremodel.Tree, error) 587 MergeBySpans(ctx context.Context, rows iter.Iterator[Profile], spans phlaremodel.SpanSelector) (*phlaremodel.Tree, error) 588 MergeByLabels(ctx context.Context, rows iter.Iterator[Profile], s *typesv1.StackTraceSelector, by ...string) ([]*typesv1.Series, error) 589 MergePprof(ctx context.Context, rows iter.Iterator[Profile], maxNodes int64, s *typesv1.StackTraceSelector) (*profilev1.Profile, error) 590 Series(ctx context.Context, params *ingestv1.SeriesRequest) ([]*typesv1.Labels, error) 591 592 SelectMatchingProfiles(ctx context.Context, params *ingestv1.SelectProfilesRequest) (iter.Iterator[Profile], error) 593 SelectMergeByStacktraces(ctx context.Context, params *ingestv1.SelectProfilesRequest, maxNodes int64) (*phlaremodel.Tree, error) 594 SelectMergeByLabels(ctx context.Context, params *ingestv1.SelectProfilesRequest, s *typesv1.StackTraceSelector, by ...string) ([]*typesv1.Series, error) 595 SelectMergeBySpans(ctx context.Context, params *ingestv1.SelectSpanProfileRequest) (*phlaremodel.Tree, error) 596 SelectMergePprof(ctx context.Context, params *ingestv1.SelectProfilesRequest, maxNodes int64, s *typesv1.StackTraceSelector) (*profilev1.Profile, error) 597 598 ProfileTypes(context.Context, *connect.Request[ingestv1.ProfileTypesRequest]) (*connect.Response[ingestv1.ProfileTypesResponse], error) 599 LabelValues(ctx context.Context, req *connect.Request[typesv1.LabelValuesRequest]) (*connect.Response[typesv1.LabelValuesResponse], error) 600 LabelNames(ctx context.Context, req *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) 601 } 602 603 type TimeBounded interface { 604 Bounds() (model.Time, model.Time) 605 } 606 607 func InRange(q TimeBounded, start, end model.Time) bool { 608 min, max := q.Bounds() 609 if start > max { 610 return false 611 } 612 if end < min { 613 return false 614 } 615 return true 616 } 617 618 type ReadAPI interface { 619 LabelValues(context.Context, *connect.Request[typesv1.LabelValuesRequest]) (*connect.Response[typesv1.LabelValuesResponse], error) 620 LabelNames(context.Context, *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) 621 ProfileTypes(context.Context, *connect.Request[ingestv1.ProfileTypesRequest]) (*connect.Response[ingestv1.ProfileTypesResponse], error) 622 Series(context.Context, *connect.Request[ingestv1.SeriesRequest]) (*connect.Response[ingestv1.SeriesResponse], error) 623 MergeProfilesStacktraces(context.Context, *connect.BidiStream[ingestv1.MergeProfilesStacktracesRequest, ingestv1.MergeProfilesStacktracesResponse]) error 624 MergeProfilesLabels(context.Context, *connect.BidiStream[ingestv1.MergeProfilesLabelsRequest, ingestv1.MergeProfilesLabelsResponse]) error 625 MergeProfilesPprof(context.Context, *connect.BidiStream[ingestv1.MergeProfilesPprofRequest, ingestv1.MergeProfilesPprofResponse]) error 626 MergeSpanProfile(context.Context, *connect.BidiStream[ingestv1.MergeSpanProfileRequest, ingestv1.MergeSpanProfileResponse]) error 627 } 628 629 var _ ReadAPI = make(Queriers, 0) 630 631 type Queriers []Querier 632 633 func (queriers Queriers) Open(ctx context.Context) error { 634 g, ctx := errgroup.WithContext(ctx) 635 g.SetLimit(128) 636 for _, q := range queriers { 637 q := q 638 g.Go(func() error { 639 if err := q.Open(ctx); err != nil { 640 return err 641 } 642 return nil 643 }) 644 } 645 return g.Wait() 646 } 647 648 func (queriers Queriers) SelectMatchingProfiles(ctx context.Context, params *ingestv1.SelectProfilesRequest) (iter.Iterator[Profile], error) { 649 iters, err := SelectMatchingProfiles(ctx, params, queriers) 650 if err != nil { 651 return nil, err 652 } 653 return phlaremodel.NewMergeIterator(maxBlockProfile, true, iters...), nil 654 } 655 656 func (queriers Queriers) LabelValues(ctx context.Context, req *connect.Request[typesv1.LabelValuesRequest]) (*connect.Response[typesv1.LabelValuesResponse], error) { 657 blockGetter := queriers.ForTimeRange 658 _, hasTimeRange := phlaremodel.GetTimeRange(req.Msg) 659 if !hasTimeRange { 660 blockGetter = func(_ context.Context, _, _ model.Time, _ *ingestv1.Hints) (Queriers, error) { 661 return queriers, nil 662 } 663 } 664 res, err := LabelValues(ctx, req, blockGetter) 665 if err != nil { 666 return nil, err 667 } 668 return connect.NewResponse(res), nil 669 } 670 671 func (queriers Queriers) LabelNames(ctx context.Context, req *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) { 672 blockGetter := queriers.ForTimeRange 673 _, hasTimeRange := phlaremodel.GetTimeRange(req.Msg) 674 if !hasTimeRange { 675 blockGetter = func(_ context.Context, _, _ model.Time, _ *ingestv1.Hints) (Queriers, error) { 676 return queriers, nil 677 } 678 } 679 res, err := LabelNames(ctx, req, blockGetter) 680 if err != nil { 681 return nil, err 682 } 683 return connect.NewResponse(res), nil 684 } 685 686 func (queriers Queriers) ProfileTypes(ctx context.Context, req *connect.Request[ingestv1.ProfileTypesRequest]) (*connect.Response[ingestv1.ProfileTypesResponse], error) { 687 blockGetter := queriers.ForTimeRange 688 _, hasTimeRange := phlaremodel.GetTimeRange(req.Msg) 689 if !hasTimeRange { 690 blockGetter = func(_ context.Context, _, _ model.Time, _ *ingestv1.Hints) (Queriers, error) { 691 return queriers, nil 692 } 693 } 694 res, err := ProfileTypes(ctx, req, blockGetter) 695 if err != nil { 696 return nil, err 697 } 698 return res, nil 699 } 700 701 func (queriers Queriers) Series(ctx context.Context, req *connect.Request[ingestv1.SeriesRequest]) (*connect.Response[ingestv1.SeriesResponse], error) { 702 // todo: verify empty timestamp request should return all series 703 blockGetter := queriers.ForTimeRange 704 // Legacy Series queries without a range should return all series from all head blocks. 705 if req.Msg.Start == 0 || req.Msg.End == 0 { 706 blockGetter = func(_ context.Context, _, _ model.Time, _ *ingestv1.Hints) (Queriers, error) { 707 return queriers, nil 708 } 709 } 710 res, err := Series(ctx, req.Msg, blockGetter) 711 if err != nil { 712 return nil, err 713 } 714 return connect.NewResponse(res), nil 715 } 716 717 func (queriers Queriers) MergeProfilesStacktraces(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesStacktracesRequest, ingestv1.MergeProfilesStacktracesResponse]) error { 718 return MergeProfilesStacktraces(ctx, stream, queriers.ForTimeRange) 719 } 720 721 func (queriers Queriers) MergeProfilesLabels(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesLabelsRequest, ingestv1.MergeProfilesLabelsResponse]) error { 722 return MergeProfilesLabels(ctx, stream, queriers.ForTimeRange) 723 } 724 725 func (queriers Queriers) MergeProfilesPprof(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesPprofRequest, ingestv1.MergeProfilesPprofResponse]) error { 726 return MergeProfilesPprof(ctx, stream, queriers.ForTimeRange) 727 } 728 729 func (queriers Queriers) MergeSpanProfile(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeSpanProfileRequest, ingestv1.MergeSpanProfileResponse]) error { 730 return MergeSpanProfile(ctx, stream, queriers.ForTimeRange) 731 } 732 733 type BlockGetter func(ctx context.Context, start, end model.Time, hints *ingestv1.Hints) (Queriers, error) 734 735 func (queriers Queriers) ForTimeRange(_ context.Context, start, end model.Time, hints *ingestv1.Hints) (Queriers, error) { 736 skipBlock := HintsToBlockSkipper(hints) 737 738 result := make(Queriers, 0, len(queriers)) 739 for _, q := range queriers { 740 if !InRange(q, start, end) { 741 continue 742 } 743 744 if skipBlock(q.BlockID()) { 745 continue 746 } 747 748 result = append(result, q) 749 } 750 return result, nil 751 } 752 753 func HintsToBlockSkipper(hints *ingestv1.Hints) func(ulid string) bool { 754 if hints != nil && hints.Block != nil { 755 m := make(map[string]struct{}) 756 for _, blockID := range hints.Block.Ulids { 757 m[blockID] = struct{}{} 758 } 759 return func(ulid string) bool { 760 _, exists := m[ulid] 761 return !exists 762 } 763 } 764 765 // without hints do not skip any block 766 return func(ulid string) bool { return false } 767 } 768 769 // SelectMatchingProfiles returns a list iterator of profiles matching the given request. 770 func SelectMatchingProfiles(ctx context.Context, request *ingestv1.SelectProfilesRequest, queriers Queriers) ([]iter.Iterator[Profile], error) { 771 g, ctx := errgroup.WithContext(ctx) 772 iters := make([]iter.Iterator[Profile], len(queriers)) 773 774 skipBlock := HintsToBlockSkipper(request.Hints) 775 776 for i, querier := range queriers { 777 if skipBlock(querier.BlockID()) { 778 iters[i] = iter.NewEmptyIterator[Profile]() 779 continue 780 } 781 i := i 782 querier := querier 783 g.Go(util.RecoverPanic(func() error { 784 profiles, err := querier.SelectMatchingProfiles(ctx, request) 785 if err != nil { 786 return err 787 } 788 iters[i] = iter.NewBufferedIterator(profiles, 1024) 789 return nil 790 })) 791 } 792 793 if err := g.Wait(); err != nil { 794 for _, it := range iters { 795 if it != nil { 796 runutil.CloseWithLogOnErr(util.Logger, it, "closing buffered iterator") 797 } 798 } 799 return nil, err 800 } 801 return iters, nil 802 } 803 804 func MergeProfilesStacktraces(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesStacktracesRequest, ingestv1.MergeProfilesStacktracesResponse], blockGetter BlockGetter) error { 805 sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeProfilesStacktraces") 806 defer sp.Finish() 807 808 r, err := stream.Receive() 809 if err != nil { 810 if errors.Is(err, io.EOF) { 811 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 812 } 813 return err 814 } 815 816 if r.Request == nil { 817 return connect.NewError(connect.CodeInvalidArgument, errors.New("missing initial select request")) 818 } 819 request := r.Request 820 sp.LogFields( 821 otlog.String("start", model.Time(request.Start).Time().String()), 822 otlog.String("end", model.Time(request.End).Time().String()), 823 otlog.String("selector", request.LabelSelector), 824 otlog.String("profile_id", request.Type.ID), 825 otlog.Object("hints", request.Hints), 826 ) 827 828 queriers, err := blockGetter(ctx, model.Time(request.Start), model.Time(request.End), request.Hints) 829 if err != nil { 830 return err 831 } 832 833 deduplicationNeeded := true 834 if request.Hints != nil && request.Hints.Block != nil { 835 deduplicationNeeded = request.Hints.Block.Deduplication 836 } 837 838 var m sync.Mutex 839 t := new(phlaremodel.Tree) 840 g, ctx := errgroup.WithContext(ctx) 841 842 // depending on if new need deduplication or not there are two different code paths. 843 if !deduplicationNeeded { 844 // signal the end of the profile streaming by sending an empty response. 845 sp.LogFields(otlog.String("msg", "no profile streaming as no deduplication needed")) 846 if err = stream.Send(&ingestv1.MergeProfilesStacktracesResponse{}); err != nil { 847 return err 848 } 849 850 // in this path we can just merge the profiles from each block and send the result to the client. 851 for _, querier := range queriers { 852 querier := querier 853 g.Go(util.RecoverPanic(func() error { 854 // TODO(simonswine): Split profiles per row group and run the MergeByStacktraces in parallel. 855 merge, err := querier.SelectMergeByStacktraces(ctx, request, r.GetMaxNodes()) 856 if err != nil { 857 return err 858 } 859 860 m.Lock() 861 t.Merge(merge) 862 m.Unlock() 863 return nil 864 })) 865 } 866 } else { 867 // in this path we have to go thorugh every profile and deduplicate them. 868 iters, err := SelectMatchingProfiles(ctx, request, queriers) 869 if err != nil { 870 return err 871 } 872 873 // send batches of profiles to client and filter via bidi stream. 874 selectedProfiles, err := filterProfiles[ 875 BidiServerMerge[*ingestv1.MergeProfilesStacktracesResponse, *ingestv1.MergeProfilesStacktracesRequest], 876 *ingestv1.MergeProfilesStacktracesResponse, 877 *ingestv1.MergeProfilesStacktracesRequest](ctx, iters, defaultBatchSize, stream) 878 if err != nil { 879 return err 880 } 881 882 for i, querier := range queriers { 883 querier := querier 884 i := i 885 if len(selectedProfiles[i]) == 0 { 886 continue 887 } 888 // Sort profiles for better read locality. 889 // Merge async the result so we can continue streaming profiles. 890 g.Go(util.RecoverPanic(func() error { 891 merge, err := querier.MergeByStacktraces(ctx, iter.NewSliceIterator(querier.Sort(selectedProfiles[i])), r.GetMaxNodes()) 892 if err != nil { 893 return err 894 } 895 m.Lock() 896 t.Merge(merge) 897 m.Unlock() 898 return nil 899 })) 900 } 901 902 // Signals the end of the profile streaming by sending an empty response. 903 // This allows the client to not block other streaming ingesters. 904 sp.LogFields(otlog.String("msg", "signaling the end of the profile streaming")) 905 if err = stream.Send(&ingestv1.MergeProfilesStacktracesResponse{}); err != nil { 906 return err 907 } 908 } 909 910 if err = g.Wait(); err != nil { 911 return err 912 } 913 914 // sends the final result to the client. 915 treeBytes := t.Bytes(r.GetMaxNodes()) 916 sp.LogFields( 917 otlog.String("msg", "sending the final result to the client"), 918 otlog.Int("tree_bytes", len(treeBytes)), 919 ) 920 err = stream.Send(&ingestv1.MergeProfilesStacktracesResponse{ 921 Result: &ingestv1.MergeProfilesStacktracesResult{ 922 Format: ingestv1.StacktracesMergeFormat_MERGE_FORMAT_TREE, 923 TreeBytes: treeBytes, 924 }, 925 }) 926 if err != nil { 927 if errors.Is(err, io.EOF) { 928 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 929 } 930 return err 931 } 932 933 return nil 934 } 935 936 func MergeSpanProfile(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeSpanProfileRequest, ingestv1.MergeSpanProfileResponse], blockGetter BlockGetter) error { 937 sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeSpanProfile") 938 defer sp.Finish() 939 940 r, err := stream.Receive() 941 if err != nil { 942 if errors.Is(err, io.EOF) { 943 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 944 } 945 return err 946 } 947 948 if r.Request == nil { 949 return connect.NewError(connect.CodeInvalidArgument, errors.New("missing initial select request")) 950 } 951 request := r.Request 952 sp.LogFields( 953 otlog.String("start", model.Time(request.Start).Time().String()), 954 otlog.String("end", model.Time(request.End).Time().String()), 955 otlog.String("selector", request.LabelSelector), 956 otlog.String("profile_type_id", request.Type.ID), 957 otlog.Object("hints", request.Hints), 958 ) 959 960 spanSelector, err := phlaremodel.NewSpanSelector(request.SpanSelector) 961 if err != nil { 962 return err 963 } 964 965 queriers, err := blockGetter(ctx, model.Time(request.Start), model.Time(request.End), request.Hints) 966 if err != nil { 967 return err 968 } 969 970 deduplicationNeeded := true 971 if request.Hints != nil && request.Hints.Block != nil { 972 deduplicationNeeded = request.Hints.Block.Deduplication 973 } 974 975 var m sync.Mutex 976 t := new(phlaremodel.Tree) 977 g, ctx := errgroup.WithContext(ctx) 978 979 // depending on if new need deduplication or not there are two different code paths. 980 if !deduplicationNeeded { 981 // signal the end of the profile streaming by sending an empty response. 982 sp.LogFields(otlog.String("msg", "no profile streaming as no deduplication needed")) 983 if err = stream.Send(&ingestv1.MergeSpanProfileResponse{}); err != nil { 984 return err 985 } 986 987 // in this path we can just merge the profiles from each block and send the result to the client. 988 for _, querier := range queriers { 989 querier := querier 990 g.Go(util.RecoverPanic(func() error { 991 // TODO(simonswine): Split profiles per row group and run the MergeByStacktraces in parallel. 992 merge, err := querier.SelectMergeBySpans(ctx, request) 993 if err != nil { 994 return err 995 } 996 997 m.Lock() 998 t.Merge(merge) 999 m.Unlock() 1000 return nil 1001 })) 1002 } 1003 } else { 1004 // in this path we have to go thorugh every profile and deduplicate them. 1005 iters, err := SelectMatchingProfiles(ctx, &ingestv1.SelectProfilesRequest{ 1006 LabelSelector: request.LabelSelector, 1007 Type: request.Type, 1008 Start: request.Start, 1009 End: request.End, 1010 Hints: request.Hints, 1011 }, queriers) 1012 if err != nil { 1013 return err 1014 } 1015 1016 // send batches of profiles to client and filter via bidi stream. 1017 selectedProfiles, err := filterProfiles[ 1018 BidiServerMerge[*ingestv1.MergeSpanProfileResponse, *ingestv1.MergeSpanProfileRequest], 1019 *ingestv1.MergeSpanProfileResponse, 1020 *ingestv1.MergeSpanProfileRequest](ctx, iters, defaultBatchSize, stream) 1021 if err != nil { 1022 return err 1023 } 1024 1025 for i, querier := range queriers { 1026 querier := querier 1027 i := i 1028 if len(selectedProfiles[i]) == 0 { 1029 continue 1030 } 1031 // Sort profiles for better read locality. 1032 // Merge async the result so we can continue streaming profiles. 1033 g.Go(util.RecoverPanic(func() error { 1034 merge, err := querier.MergeBySpans(ctx, iter.NewSliceIterator(querier.Sort(selectedProfiles[i])), spanSelector) 1035 if err != nil { 1036 return err 1037 } 1038 m.Lock() 1039 t.Merge(merge) 1040 m.Unlock() 1041 return nil 1042 })) 1043 } 1044 1045 // Signals the end of the profile streaming by sending an empty response. 1046 // This allows the client to not block other streaming ingesters. 1047 sp.LogFields(otlog.String("msg", "signaling the end of the profile streaming")) 1048 if err = stream.Send(&ingestv1.MergeSpanProfileResponse{}); err != nil { 1049 return err 1050 } 1051 } 1052 1053 if err = g.Wait(); err != nil { 1054 return err 1055 } 1056 1057 // sends the final result to the client. 1058 treeBytes := t.Bytes(r.GetMaxNodes()) 1059 sp.LogFields( 1060 otlog.String("msg", "sending the final result to the client"), 1061 otlog.Int("tree_bytes", len(treeBytes)), 1062 ) 1063 err = stream.Send(&ingestv1.MergeSpanProfileResponse{ 1064 Result: &ingestv1.MergeSpanProfileResult{ 1065 TreeBytes: treeBytes, 1066 }, 1067 }) 1068 if err != nil { 1069 if errors.Is(err, io.EOF) { 1070 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 1071 } 1072 return err 1073 } 1074 1075 return nil 1076 } 1077 1078 func MergeProfilesLabels(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesLabelsRequest, ingestv1.MergeProfilesLabelsResponse], blockGetter BlockGetter) error { 1079 sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeProfilesLabels") 1080 defer sp.Finish() 1081 1082 r, err := stream.Receive() 1083 if err != nil { 1084 if errors.Is(err, io.EOF) { 1085 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 1086 } 1087 return err 1088 } 1089 1090 if r.Request == nil { 1091 return connect.NewError(connect.CodeInvalidArgument, errors.New("missing initial select request")) 1092 } 1093 request := r.Request 1094 by := r.By 1095 sort.Strings(by) 1096 sp.LogFields( 1097 otlog.String("start", model.Time(request.Start).Time().String()), 1098 otlog.String("end", model.Time(request.End).Time().String()), 1099 otlog.String("selector", request.LabelSelector), 1100 otlog.String("profile_id", request.Type.ID), 1101 otlog.String("by", strings.Join(by, ",")), 1102 ) 1103 1104 queriers, err := blockGetter(ctx, model.Time(request.Start), model.Time(request.End), request.Hints) 1105 if err != nil { 1106 return err 1107 } 1108 result := make([][]*typesv1.Series, 0, len(queriers)) 1109 g, ctx := errgroup.WithContext(ctx) 1110 sync := lo.Synchronize() 1111 1112 deduplicationNeeded := true 1113 if request.Hints != nil && request.Hints.Block != nil { 1114 deduplicationNeeded = request.Hints.Block.Deduplication 1115 } 1116 1117 if !deduplicationNeeded { 1118 // signal the end of the profile streaming by sending an empty response. 1119 sp.LogFields(otlog.String("msg", "no profile streaming as no deduplication needed")) 1120 if err = stream.Send(&ingestv1.MergeProfilesLabelsResponse{}); err != nil { 1121 return err 1122 } 1123 // in this path we can just merge the profiles from each block and send the result to the client. 1124 for _, querier := range queriers { 1125 querier := querier 1126 g.Go(util.RecoverPanic(func() error { 1127 merge, err := querier.SelectMergeByLabels(ctx, request, r.StackTraceSelector, by...) 1128 if err != nil { 1129 return err 1130 } 1131 1132 sync.Do(func() { 1133 result = append(result, merge) 1134 }) 1135 return nil 1136 })) 1137 } 1138 } else { 1139 iters, err := SelectMatchingProfiles(ctx, request, queriers) 1140 if err != nil { 1141 return err 1142 } 1143 // send batches of profiles to client and filter via bidi stream. 1144 selectedProfiles, err := filterProfiles[ 1145 BidiServerMerge[*ingestv1.MergeProfilesLabelsResponse, *ingestv1.MergeProfilesLabelsRequest], 1146 *ingestv1.MergeProfilesLabelsResponse, 1147 *ingestv1.MergeProfilesLabelsRequest](ctx, iters, defaultBatchSize, stream) 1148 if err != nil { 1149 return err 1150 } 1151 1152 // Signals the end of the profile streaming by sending an empty request. 1153 // This allows the client to not block other streaming ingesters. 1154 if err := stream.Send(&ingestv1.MergeProfilesLabelsResponse{}); err != nil { 1155 return err 1156 } 1157 for i, querier := range queriers { 1158 i := i 1159 querier := querier 1160 if len(selectedProfiles[i]) == 0 { 1161 continue 1162 } 1163 // Sort profiles for better read locality. 1164 // And merge async the result for each queriers. 1165 g.Go(util.RecoverPanic(func() error { 1166 merge, err := querier.MergeByLabels(ctx, 1167 iter.NewSliceIterator(querier.Sort(selectedProfiles[i])), 1168 r.StackTraceSelector, 1169 by...) 1170 if err != nil { 1171 return err 1172 } 1173 sync.Do(func() { 1174 result = append(result, merge) 1175 }) 1176 1177 return nil 1178 })) 1179 } 1180 } 1181 1182 if err := g.Wait(); err != nil { 1183 return err 1184 } 1185 1186 // sends the final result to the client. 1187 err = stream.Send(&ingestv1.MergeProfilesLabelsResponse{ 1188 Series: phlaremodel.MergeSeries(request.Aggregation, result...), 1189 }) 1190 if err != nil { 1191 if errors.Is(err, io.EOF) { 1192 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 1193 } 1194 return err 1195 } 1196 1197 return nil 1198 } 1199 1200 func MergeProfilesPprof(ctx context.Context, stream *connect.BidiStream[ingestv1.MergeProfilesPprofRequest, ingestv1.MergeProfilesPprofResponse], blockGetter BlockGetter) error { 1201 sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeProfilesPprof") 1202 defer sp.Finish() 1203 1204 r, err := stream.Receive() 1205 if err != nil { 1206 if errors.Is(err, io.EOF) { 1207 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 1208 } 1209 return err 1210 } 1211 1212 if r.Request == nil { 1213 return connect.NewError(connect.CodeInvalidArgument, errors.New("missing initial select request")) 1214 } 1215 1216 request := r.Request 1217 sp.SetTag("start", model.Time(request.Start).Time().String()). 1218 SetTag("end", model.Time(request.End).Time().String()). 1219 SetTag("selector", request.LabelSelector). 1220 SetTag("profile_type", request.Type.ID). 1221 SetTag("max_nodes", r.GetMaxNodes()) 1222 sp.LogFields(otlog.Object("hints", request.Hints)) 1223 1224 queriers, err := blockGetter(ctx, model.Time(request.Start), model.Time(request.End), request.Hints) 1225 if err != nil { 1226 return err 1227 } 1228 1229 deduplicationNeeded := true 1230 if request.Hints != nil && request.Hints.Block != nil { 1231 deduplicationNeeded = request.Hints.Block.Deduplication 1232 } 1233 1234 var result pprof.ProfileMerge 1235 g, ctx := errgroup.WithContext(ctx) 1236 1237 // depending on if new need deduplication or not there are two different code paths. 1238 if !deduplicationNeeded { 1239 // signal the end of the profile streaming by sending an empty response. 1240 sp.LogFields(otlog.String("msg", "no profile streaming as no deduplication needed")) 1241 if err = stream.Send(&ingestv1.MergeProfilesPprofResponse{}); err != nil { 1242 return err 1243 } 1244 1245 // in this path we can just merge the profiles from each block and send the result to the client. 1246 for _, querier := range queriers { 1247 querier := querier 1248 g.Go(util.RecoverPanic(func() error { 1249 p, err := querier.SelectMergePprof(ctx, request, r.GetMaxNodes(), r.StackTraceSelector) 1250 if err != nil { 1251 return err 1252 } 1253 return result.Merge(p, true) 1254 })) 1255 } 1256 } else { 1257 // in this path we have to go thorugh every profile and deduplicate them. 1258 iters, err := SelectMatchingProfiles(ctx, request, queriers) 1259 if err != nil { 1260 return err 1261 } 1262 1263 // send batches of profiles to client and filter via bidi stream. 1264 selectedProfiles, err := filterProfiles[ 1265 BidiServerMerge[*ingestv1.MergeProfilesPprofResponse, *ingestv1.MergeProfilesPprofRequest], 1266 *ingestv1.MergeProfilesPprofResponse, 1267 *ingestv1.MergeProfilesPprofRequest](ctx, iters, defaultBatchSize, stream) 1268 if err != nil { 1269 return err 1270 } 1271 1272 for i, querier := range queriers { 1273 querier := querier 1274 i := i 1275 if len(selectedProfiles[i]) == 0 { 1276 continue 1277 } 1278 // Sort profiles for better read locality. 1279 // Merge async the result so we can continue streaming profiles. 1280 g.Go(util.RecoverPanic(func() error { 1281 p, err := querier.MergePprof(ctx, 1282 iter.NewSliceIterator(querier.Sort(selectedProfiles[i])), 1283 r.GetMaxNodes(), r.StackTraceSelector) 1284 if err != nil { 1285 return err 1286 } 1287 return result.Merge(p, true) 1288 })) 1289 } 1290 1291 // Signals the end of the profile streaming by sending an empty response. 1292 // This allows the client to not block other streaming ingesters. 1293 sp.LogFields(otlog.String("msg", "signaling the end of the profile streaming")) 1294 if err = stream.Send(&ingestv1.MergeProfilesPprofResponse{}); err != nil { 1295 return err 1296 } 1297 } 1298 1299 if err = g.Wait(); err != nil { 1300 return err 1301 } 1302 1303 sp.LogFields(otlog.String("msg", "building pprof bytes")) 1304 mergedProfile := result.Profile() 1305 pprof.SetProfileMetadata(mergedProfile, request.Type, model.Time(r.Request.End).UnixNano(), 0) 1306 1307 // connect go already handles compression. 1308 pprofBytes, err := pprof.Marshal(mergedProfile, false) 1309 if err != nil { 1310 return err 1311 } 1312 // sends the final result to the client. 1313 sp.LogFields( 1314 otlog.String("msg", "sending the final result to the client"), 1315 otlog.Int("tree_bytes", len(pprofBytes)), 1316 ) 1317 err = stream.Send(&ingestv1.MergeProfilesPprofResponse{Result: pprofBytes}) 1318 if err != nil { 1319 if errors.Is(err, io.EOF) { 1320 return connect.NewError(connect.CodeCanceled, errors.New("client closed stream")) 1321 } 1322 return err 1323 } 1324 1325 return nil 1326 } 1327 1328 func ProfileTypes(ctx context.Context, req *connect.Request[ingestv1.ProfileTypesRequest], blockGetter BlockGetter) (*connect.Response[ingestv1.ProfileTypesResponse], error) { 1329 queriers, err := blockGetter(ctx, model.Time(req.Msg.Start), model.Time(req.Msg.End), nil) 1330 if err != nil { 1331 return nil, err 1332 } 1333 1334 g, ctx := errgroup.WithContext(ctx) 1335 uniqTypes := make(map[string]*typesv1.ProfileType) 1336 lock := sync.Mutex{} 1337 1338 for _, q := range queriers { 1339 q := q 1340 g.Go(func() error { 1341 res, err := q.ProfileTypes(ctx, req) 1342 if err != nil { 1343 return err 1344 } 1345 1346 lock.Lock() 1347 defer lock.Unlock() 1348 for _, t := range res.Msg.ProfileTypes { 1349 uniqTypes[t.ID] = t.CloneVT() 1350 } 1351 return nil 1352 }) 1353 } 1354 if err := g.Wait(); err != nil { 1355 return nil, err 1356 } 1357 types := lo.Values(uniqTypes) 1358 sort.Slice(types, func(i, j int) bool { 1359 return types[i].ID < types[j].ID 1360 }) 1361 return connect.NewResponse(&ingestv1.ProfileTypesResponse{ 1362 ProfileTypes: types, 1363 }), nil 1364 } 1365 1366 func LabelValues(ctx context.Context, req *connect.Request[typesv1.LabelValuesRequest], blockGetter BlockGetter) (*typesv1.LabelValuesResponse, error) { 1367 queriers, err := blockGetter(ctx, model.Time(req.Msg.Start), model.Time(req.Msg.End), nil) 1368 if err != nil { 1369 return nil, err 1370 } 1371 1372 var values []string 1373 var lock sync.Mutex 1374 group, ctx := errgroup.WithContext(ctx) 1375 1376 const concurrentQueryLimit = 50 1377 group.SetLimit(concurrentQueryLimit) 1378 1379 for _, q := range queriers { 1380 group.Go(util.RecoverPanic(func() error { 1381 res, err := q.LabelValues(ctx, req) 1382 if err != nil { 1383 return err 1384 } 1385 1386 lock.Lock() 1387 values = append(values, res.Msg.Names...) 1388 lock.Unlock() 1389 return nil 1390 })) 1391 } 1392 err = group.Wait() 1393 if err != nil { 1394 return nil, err 1395 } 1396 1397 slices.Sort(values) 1398 return &typesv1.LabelValuesResponse{Names: lo.Uniq(values)}, nil 1399 } 1400 1401 func LabelNames(ctx context.Context, req *connect.Request[typesv1.LabelNamesRequest], blockGetter BlockGetter) (*typesv1.LabelNamesResponse, error) { 1402 queriers, err := blockGetter(ctx, model.Time(req.Msg.Start), model.Time(req.Msg.End), nil) 1403 if err != nil { 1404 return nil, err 1405 } 1406 1407 var labelNames []string 1408 var lock sync.Mutex 1409 group, ctx := errgroup.WithContext(ctx) 1410 1411 const concurrentQueryLimit = 50 1412 group.SetLimit(concurrentQueryLimit) 1413 1414 for _, q := range queriers { 1415 group.Go(util.RecoverPanic(func() error { 1416 res, err := q.LabelNames(ctx, req) 1417 if err != nil { 1418 return err 1419 } 1420 1421 lock.Lock() 1422 labelNames = append(labelNames, res.Msg.Names...) 1423 lock.Unlock() 1424 return nil 1425 })) 1426 } 1427 err = group.Wait() 1428 if err != nil { 1429 return nil, err 1430 } 1431 1432 slices.Sort(labelNames) 1433 return &typesv1.LabelNamesResponse{ 1434 Names: lo.Uniq(labelNames), 1435 }, nil 1436 } 1437 1438 func Series(ctx context.Context, req *ingestv1.SeriesRequest, blockGetter BlockGetter) (*ingestv1.SeriesResponse, error) { 1439 queriers, err := blockGetter(ctx, model.Time(req.Start), model.Time(req.End), nil) 1440 if err != nil { 1441 return nil, err 1442 } 1443 1444 var labelsSet []*typesv1.Labels 1445 var lock sync.Mutex 1446 group, ctx := errgroup.WithContext(ctx) 1447 1448 // TODO(bryan) Verify this limit is ok 1449 const concurrentQueryLimit = 50 1450 group.SetLimit(concurrentQueryLimit) 1451 1452 for _, q := range queriers { 1453 q := q 1454 group.Go(util.RecoverPanic(func() error { 1455 labels, err := q.Series(ctx, req) 1456 if err != nil { 1457 return err 1458 } 1459 1460 lock.Lock() 1461 labelsSet = append(labelsSet, labels...) 1462 lock.Unlock() 1463 return nil 1464 })) 1465 } 1466 err = group.Wait() 1467 if err != nil { 1468 return nil, err 1469 } 1470 1471 sort.Slice(labelsSet, func(i, j int) bool { 1472 return phlaremodel.CompareLabelPairs(labelsSet[i].Labels, labelsSet[j].Labels) < 0 1473 }) 1474 return &ingestv1.SeriesResponse{ 1475 LabelsSet: lo.UniqBy(labelsSet, func(set *typesv1.Labels) uint64 { 1476 return phlaremodel.Labels(set.Labels).Hash() 1477 }), 1478 }, nil 1479 } 1480 1481 var maxBlockProfile Profile = BlockProfile{ 1482 timestamp: model.Time(math.MaxInt64), 1483 } 1484 1485 type BlockProfile struct { 1486 rowNum int64 1487 timestamp model.Time 1488 fingerprint model.Fingerprint 1489 labels phlaremodel.Labels 1490 partition uint64 1491 } 1492 1493 func (p BlockProfile) StacktracePartition() uint64 { 1494 return p.partition 1495 } 1496 1497 func (p BlockProfile) RowNumber() int64 { 1498 return p.rowNum 1499 } 1500 1501 func (p BlockProfile) Labels() phlaremodel.Labels { 1502 return p.labels 1503 } 1504 1505 func (p BlockProfile) Timestamp() model.Time { 1506 return p.timestamp 1507 } 1508 1509 func (p BlockProfile) Fingerprint() model.Fingerprint { 1510 return p.fingerprint 1511 } 1512 1513 func retrieveStacktracePartition(buf [][]parquet.Value, pos int) uint64 { 1514 if len(buf) > pos && len(buf[pos]) == 1 { 1515 return buf[pos][0].Uint64() 1516 } 1517 1518 // return 0 stacktrace partition 1519 return uint64(0) 1520 } 1521 1522 type labelsInfo struct { 1523 fp model.Fingerprint 1524 lbs phlaremodel.Labels 1525 } 1526 1527 func (b *singleBlockQuerier) SelectMatchingProfiles(ctx context.Context, params *ingestv1.SelectProfilesRequest) (iter.Iterator[Profile], error) { 1528 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectMatchingProfiles - Block") 1529 defer sp.Finish() 1530 sp.SetTag("block ULID", b.meta.ULID.String()) 1531 1532 if err := b.Open(ctx); err != nil { 1533 return nil, err 1534 } 1535 b.queries.Add(1) 1536 defer b.queries.Done() 1537 1538 matchers, err := parser.ParseMetricSelector(params.LabelSelector) 1539 if err != nil { 1540 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 1541 } 1542 if params.Type == nil { 1543 return nil, errors.New("no profileType given") 1544 } 1545 matchers = append(matchers, phlaremodel.SelectorFromProfileType(params.Type)) 1546 1547 postings, err := PostingsForMatchers(b.index, nil, matchers...) 1548 if err != nil { 1549 return nil, err 1550 } 1551 1552 var ( 1553 lbls = make(phlaremodel.Labels, 0, 6) 1554 chks = make([]index.ChunkMeta, 1) 1555 lblsPerRef = make(map[int64]labelsInfo) 1556 ) 1557 1558 // get all relevant labels/fingerprints 1559 for postings.Next() { 1560 fp, err := b.index.Series(postings.At(), &lbls, &chks) 1561 if err != nil { 1562 return nil, err 1563 } 1564 if _, exists := lblsPerRef[int64(chks[0].SeriesIndex)]; exists { 1565 continue 1566 } 1567 info := labelsInfo{ 1568 fp: model.Fingerprint(fp), 1569 lbs: make(phlaremodel.Labels, len(lbls)), 1570 } 1571 copy(info.lbs, lbls) 1572 lblsPerRef[int64(chks[0].SeriesIndex)] = info 1573 1574 } 1575 1576 var buf [][]parquet.Value 1577 1578 profiles := b.profileSourceTable() 1579 pIt := query.NewBinaryJoinIterator( 1580 0, 1581 profiles.columnIter(ctx, "SeriesIndex", query.NewMapPredicate(lblsPerRef), "SeriesIndex"), 1582 profiles.columnIter(ctx, "TimeNanos", query.NewIntBetweenPredicate(model.Time(params.Start).UnixNano(), model.Time(params.End).UnixNano()), "TimeNanos"), 1583 ) 1584 1585 if b.meta.Version >= 2 { 1586 pIt = query.NewBinaryJoinIterator( 1587 0, 1588 pIt, 1589 profiles.columnIter(ctx, "StacktracePartition", nil, "StacktracePartition"), 1590 ) 1591 buf = make([][]parquet.Value, 3) 1592 } else { 1593 buf = make([][]parquet.Value, 2) 1594 } 1595 1596 iters := make([]iter.Iterator[Profile], 0, len(lblsPerRef)) 1597 defer pIt.Close() 1598 1599 currSeriesIndex := int64(-1) 1600 var currentSeriesSlice []Profile 1601 for pIt.Next() { 1602 res := pIt.At() 1603 buf = res.Columns(buf, "SeriesIndex", "TimeNanos", "StacktracePartition") 1604 seriesIndex := buf[0][0].Int64() 1605 if seriesIndex != currSeriesIndex { 1606 currSeriesIndex = seriesIndex 1607 if len(currentSeriesSlice) > 0 { 1608 iters = append(iters, iter.NewSliceIterator(currentSeriesSlice)) 1609 } 1610 currentSeriesSlice = make([]Profile, 0, 100) 1611 } 1612 1613 currentSeriesSlice = append(currentSeriesSlice, BlockProfile{ 1614 labels: lblsPerRef[seriesIndex].lbs, 1615 fingerprint: lblsPerRef[seriesIndex].fp, 1616 timestamp: model.TimeFromUnixNano(buf[1][0].Int64()), 1617 partition: retrieveStacktracePartition(buf, 2), 1618 rowNum: res.RowNumber[0], 1619 }) 1620 } 1621 if len(currentSeriesSlice) > 0 { 1622 iters = append(iters, iter.NewSliceIterator(currentSeriesSlice)) 1623 } 1624 1625 return phlaremodel.NewMergeIterator(maxBlockProfile, false, iters...), nil 1626 } 1627 1628 func (b *singleBlockQuerier) SelectMergeByLabels( 1629 ctx context.Context, 1630 params *ingestv1.SelectProfilesRequest, 1631 sts *typesv1.StackTraceSelector, 1632 by ...string, 1633 ) ([]*typesv1.Series, error) { 1634 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectMergeByLabels - Block") 1635 defer sp.Finish() 1636 sp.SetTag("block ULID", b.meta.ULID.String()) 1637 ctx = query.AddMetricsToContext(ctx, b.metrics.query) 1638 1639 if err := b.Open(ctx); err != nil { 1640 return nil, err 1641 } 1642 b.queries.Add(1) 1643 defer b.queries.Done() 1644 1645 matchers, err := parser.ParseMetricSelector(params.LabelSelector) 1646 if err != nil { 1647 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 1648 } 1649 if params.Type == nil { 1650 return nil, errors.New("no profileType given") 1651 } 1652 matchers = append(matchers, phlaremodel.SelectorFromProfileType(params.Type)) 1653 1654 postings, err := PostingsForMatchers(b.index, nil, matchers...) 1655 if err != nil { 1656 return nil, err 1657 } 1658 var ( 1659 chks = make([]index.ChunkMeta, 1) 1660 lblsPerRef = make(map[int64]labelsInfo) 1661 lbls = make(phlaremodel.Labels, 0, 6) 1662 ) 1663 // get all relevant labels/fingerprints 1664 for postings.Next() { 1665 fp, err := b.index.SeriesBy(postings.At(), &lbls, &chks, by...) 1666 if err != nil { 1667 return nil, err 1668 } 1669 1670 _, ok := lblsPerRef[int64(chks[0].SeriesIndex)] 1671 if !ok { 1672 info := labelsInfo{ 1673 fp: model.Fingerprint(fp), 1674 lbs: make(phlaremodel.Labels, len(lbls)), 1675 } 1676 copy(info.lbs, lbls) 1677 lblsPerRef[int64(chks[0].SeriesIndex)] = info 1678 } 1679 } 1680 1681 profiles := b.profileSourceTable() 1682 it := query.NewBinaryJoinIterator( 1683 0, 1684 profiles.columnIter(ctx, "SeriesIndex", query.NewMapPredicate(lblsPerRef), "SeriesIndex"), 1685 profiles.columnIter(ctx, "TimeNanos", query.NewIntBetweenPredicate(model.Time(params.Start).UnixNano(), model.Time(params.End).UnixNano()), "TimeNanos"), 1686 ) 1687 1688 if len(sts.GetCallSite()) == 0 { 1689 columnName := "TotalValue" 1690 if b.meta.Version == 1 { 1691 columnName = "Samples.list.element.Value" 1692 } 1693 rows := profileBatchIteratorBySeriesIndex(it, lblsPerRef) 1694 defer rows.Close() 1695 return mergeByLabels[Profile](ctx, profiles.file, columnName, rows, by...) 1696 } 1697 1698 if b.meta.Version < 2 { 1699 return nil, nil 1700 } 1701 1702 r := symdb.NewResolver(ctx, b.symbols, 1703 symdb.WithResolverStackTraceSelector(sts)) 1704 defer r.Release() 1705 1706 it = query.NewBinaryJoinIterator(0, it, profiles.columnIter(ctx, "StacktracePartition", nil, "StacktracePartition")) 1707 rows := profileBatchIteratorBySeriesIndex(it, lblsPerRef) 1708 defer rows.Close() 1709 1710 return mergeByLabelsWithStackTraceSelector[Profile](ctx, profiles.file, rows, r, by...) 1711 } 1712 1713 func (b *singleBlockQuerier) SelectMergeByStacktraces(ctx context.Context, params *ingestv1.SelectProfilesRequest, maxNodes int64) (tree *phlaremodel.Tree, err error) { 1714 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectMergeByStacktraces - Block") 1715 defer sp.Finish() 1716 sp.SetTag("block ULID", b.meta.ULID.String()) 1717 ctx = query.AddMetricsToContext(ctx, b.metrics.query) 1718 1719 if err := b.Open(ctx); err != nil { 1720 return nil, err 1721 } 1722 b.queries.Add(1) 1723 defer b.queries.Done() 1724 1725 matchers, err := parser.ParseMetricSelector(params.LabelSelector) 1726 if err != nil { 1727 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 1728 } 1729 if params.Type == nil { 1730 return nil, errors.New("no profileType given") 1731 } 1732 matchers = append(matchers, phlaremodel.SelectorFromProfileType(params.Type)) 1733 1734 postings, err := PostingsForMatchers(b.index, nil, matchers...) 1735 if err != nil { 1736 return nil, err 1737 } 1738 1739 var ( 1740 chks = make([]index.ChunkMeta, 1) 1741 lblsPerRef = make(map[int64]struct{}) 1742 ) 1743 1744 // get all relevant labels/fingerprints 1745 for postings.Next() { 1746 _, err := b.index.Series(postings.At(), nil, &chks) 1747 if err != nil { 1748 return nil, err 1749 } 1750 lblsPerRef[int64(chks[0].SeriesIndex)] = struct{}{} 1751 } 1752 r := symdb.NewResolver(ctx, b.symbols, symdb.WithResolverMaxNodes(maxNodes)) 1753 defer r.Release() 1754 1755 g, ctx := errgroup.WithContext(ctx) 1756 util.SplitTimeRangeByResolution(time.UnixMilli(params.Start), time.UnixMilli(params.End), b.downsampleResolutions(), func(tr util.TimeRange) { 1757 g.Go(func() error { 1758 profiles := b.profileTable(tr.Resolution, params.GetAggregation()) 1759 it := query.NewBinaryJoinIterator( 1760 0, 1761 profiles.columnIter(ctx, "SeriesIndex", query.NewMapPredicate(lblsPerRef), ""), 1762 profiles.columnIter(ctx, "TimeNanos", query.NewIntBetweenPredicate(tr.Start.UnixNano(), tr.End.UnixNano()), ""), 1763 ) 1764 1765 if b.meta.Version >= 2 { 1766 it = query.NewBinaryJoinIterator(0, 1767 it, 1768 profiles.columnIter(ctx, "StacktracePartition", nil, "StacktracePartition"), 1769 ) 1770 } 1771 rows := profileRowBatchIterator(it) 1772 defer rows.Close() 1773 return mergeByStacktraces(ctx, profiles.file, rows, r) 1774 }) 1775 }) 1776 if err = g.Wait(); err != nil { 1777 return nil, err 1778 } 1779 return r.Tree() 1780 } 1781 1782 func (b *singleBlockQuerier) SelectMergeBySpans(ctx context.Context, params *ingestv1.SelectSpanProfileRequest) (*phlaremodel.Tree, error) { 1783 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectMergeBySpans - Block") 1784 defer sp.Finish() 1785 sp.SetTag("block ULID", b.meta.ULID.String()) 1786 ctx = query.AddMetricsToContext(ctx, b.metrics.query) 1787 1788 if err := b.Open(ctx); err != nil { 1789 return nil, err 1790 } 1791 b.queries.Add(1) 1792 defer b.queries.Done() 1793 1794 matchers, err := parser.ParseMetricSelector(params.LabelSelector) 1795 if err != nil { 1796 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 1797 } 1798 if params.Type == nil { 1799 return nil, errors.New("no profileType given") 1800 } 1801 spans, err := phlaremodel.NewSpanSelector(params.SpanSelector) 1802 if err != nil { 1803 return nil, err 1804 } 1805 matchers = append(matchers, phlaremodel.SelectorFromProfileType(params.Type)) 1806 1807 postings, err := PostingsForMatchers(b.index, nil, matchers...) 1808 if err != nil { 1809 return nil, err 1810 } 1811 1812 var ( 1813 chks = make([]index.ChunkMeta, 1) 1814 lblsPerRef = make(map[int64]struct{}) 1815 ) 1816 1817 // get all relevant labels/fingerprints 1818 for postings.Next() { 1819 _, err := b.index.Series(postings.At(), nil, &chks) 1820 if err != nil { 1821 return nil, err 1822 } 1823 lblsPerRef[int64(chks[0].SeriesIndex)] = struct{}{} 1824 } 1825 r := symdb.NewResolver(ctx, b.symbols) 1826 defer r.Release() 1827 1828 profiles := b.profileSourceTable() 1829 it := query.NewBinaryJoinIterator( 1830 0, 1831 profiles.columnIter(ctx, "SeriesIndex", query.NewMapPredicate(lblsPerRef), ""), 1832 profiles.columnIter(ctx, "TimeNanos", query.NewIntBetweenPredicate(model.Time(params.Start).UnixNano(), model.Time(params.End).UnixNano()), ""), 1833 ) 1834 1835 if b.meta.Version >= 2 { 1836 it = query.NewBinaryJoinIterator(0, 1837 it, 1838 profiles.columnIter(ctx, "StacktracePartition", nil, "StacktracePartition"), 1839 ) 1840 } 1841 1842 rows := profileRowBatchIterator(it) 1843 defer rows.Close() 1844 if err = mergeBySpans[rowProfile](ctx, profiles.file, rows, r, spans); err != nil { 1845 return nil, err 1846 } 1847 return r.Tree() 1848 } 1849 1850 func (b *singleBlockQuerier) SelectMergePprof(ctx context.Context, params *ingestv1.SelectProfilesRequest, maxNodes int64, sts *typesv1.StackTraceSelector) (*profilev1.Profile, error) { 1851 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectMergePprof - Block") 1852 defer sp.Finish() 1853 sp.SetTag("block ULID", b.meta.ULID.String()) 1854 ctx = query.AddMetricsToContext(ctx, b.metrics.query) 1855 1856 if err := b.Open(ctx); err != nil { 1857 return nil, err 1858 } 1859 b.queries.Add(1) 1860 defer b.queries.Done() 1861 1862 matchers, err := parser.ParseMetricSelector(params.LabelSelector) 1863 if err != nil { 1864 return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error()) 1865 } 1866 if params.Type == nil { 1867 return nil, errors.New("no profileType given") 1868 } 1869 matchers = append(matchers, phlaremodel.SelectorFromProfileType(params.Type)) 1870 1871 postings, err := PostingsForMatchers(b.index, nil, matchers...) 1872 if err != nil { 1873 return nil, err 1874 } 1875 1876 var ( 1877 chks = make([]index.ChunkMeta, 1) 1878 lblsPerRef = make(map[int64]struct{}) 1879 ) 1880 1881 // get all relevant labels/fingerprints 1882 for postings.Next() { 1883 _, err := b.index.Series(postings.At(), nil, &chks) 1884 if err != nil { 1885 return nil, err 1886 } 1887 lblsPerRef[int64(chks[0].SeriesIndex)] = struct{}{} 1888 } 1889 r := symdb.NewResolver(ctx, b.symbols, 1890 symdb.WithResolverMaxNodes(maxNodes), 1891 symdb.WithResolverStackTraceSelector(sts)) 1892 defer r.Release() 1893 1894 g, ctx := errgroup.WithContext(ctx) 1895 util.SplitTimeRangeByResolution(time.UnixMilli(params.Start), time.UnixMilli(params.End), b.downsampleResolutions(), func(tr util.TimeRange) { 1896 g.Go(func() error { 1897 profiles := b.profileTable(tr.Resolution, params.GetAggregation()) 1898 it := query.NewBinaryJoinIterator( 1899 0, 1900 profiles.columnIter(ctx, "SeriesIndex", query.NewMapPredicate(lblsPerRef), ""), 1901 profiles.columnIter(ctx, "TimeNanos", query.NewIntBetweenPredicate(tr.Start.UnixNano(), tr.End.UnixNano()), ""), 1902 ) 1903 1904 if b.meta.Version >= 2 { 1905 it = query.NewBinaryJoinIterator(0, 1906 it, 1907 profiles.columnIter(ctx, "StacktracePartition", nil, "StacktracePartition"), 1908 ) 1909 } 1910 rows := profileRowBatchIterator(it) 1911 defer rows.Close() 1912 return mergeByStacktraces[rowProfile](ctx, profiles.file, rows, r) 1913 }) 1914 }) 1915 if err = g.Wait(); err != nil { 1916 return nil, err 1917 } 1918 return r.Pprof() 1919 } 1920 1921 // Series selects the series labels from this block. 1922 // 1923 // Note: It will select ALL the labels in the block, not necessarily just the 1924 // subset in the time range SeriesRequest.Start to SeriesRequest.End. 1925 func (b *singleBlockQuerier) Series(ctx context.Context, params *ingestv1.SeriesRequest) ([]*typesv1.Labels, error) { 1926 sp, ctx := opentracing.StartSpanFromContext(ctx, "Series Block") 1927 defer sp.Finish() 1928 1929 if err := b.Open(ctx); err != nil { 1930 return nil, err 1931 } 1932 b.queries.Add(1) 1933 defer b.queries.Done() 1934 1935 selectors, err := parseSelectors(params.Matchers) 1936 if err != nil { 1937 return nil, err 1938 } 1939 1940 names, err := b.index.LabelNames() 1941 if err != nil { 1942 return nil, err 1943 } 1944 1945 if len(params.LabelNames) > 0 { 1946 labelNamesFilter := make(map[string]struct{}, len(params.LabelNames)) 1947 for _, n := range params.LabelNames { 1948 labelNamesFilter[n] = struct{}{} 1949 } 1950 names = lo.Filter(names, func(name string, _ int) bool { 1951 _, ok := labelNamesFilter[name] 1952 return ok 1953 }) 1954 } 1955 1956 var labelsSets []*typesv1.Labels 1957 fingerprints := make(map[uint64]struct{}) 1958 if selectors.matchesAll() { 1959 k, v := index.AllPostingsKey() 1960 iter, err := b.index.Postings(k, nil, v) 1961 if err != nil { 1962 return nil, err 1963 } 1964 1965 sets, err := b.getUniqueLabelsSets(iter, names, &fingerprints) 1966 if err != nil { 1967 return nil, err 1968 } 1969 labelsSets = append(labelsSets, sets...) 1970 } else { 1971 for _, matchers := range selectors { 1972 iter, err := PostingsForMatchers(b.index, nil, matchers...) 1973 if err != nil { 1974 return nil, err 1975 } 1976 1977 sets, err := b.getUniqueLabelsSets(iter, names, &fingerprints) 1978 if err != nil { 1979 return nil, err 1980 } 1981 labelsSets = append(labelsSets, sets...) 1982 } 1983 } 1984 return labelsSets, nil 1985 } 1986 1987 func (b *singleBlockQuerier) getUniqueLabelsSets(postings index.Postings, names []string, fingerprints *map[uint64]struct{}) ([]*typesv1.Labels, error) { 1988 var labelsSets []*typesv1.Labels 1989 1990 // This memory will be re-used between posting iterations to avoid 1991 // re-allocating many *typesv1.LabelPair objects. 1992 matchedLabelsPool := make(phlaremodel.Labels, len(names)) 1993 for i := range matchedLabelsPool { 1994 matchedLabelsPool[i] = &typesv1.LabelPair{} 1995 } 1996 1997 for postings.Next() { 1998 // Reset the pool. 1999 matchedLabelsPool = matchedLabelsPool[:0] 2000 2001 for _, name := range names { 2002 value, err := b.index.LabelValueFor(postings.At(), name) 2003 if err != nil { 2004 if err == storage.ErrNotFound { 2005 continue 2006 } 2007 return nil, err 2008 } 2009 2010 // Expand the pool's length and add this label to the end. The pool 2011 // will always have enough capacity for all the labels. 2012 matchedLabelsPool = matchedLabelsPool[:len(matchedLabelsPool)+1] 2013 matchedLabelsPool[len(matchedLabelsPool)-1].Name = name 2014 matchedLabelsPool[len(matchedLabelsPool)-1].Value = value 2015 } 2016 2017 fp := matchedLabelsPool.Hash() 2018 _, ok := (*fingerprints)[fp] 2019 if ok { 2020 continue 2021 } 2022 (*fingerprints)[fp] = struct{}{} 2023 2024 // Copy every element from the pool to a new slice. 2025 labels := &typesv1.Labels{ 2026 Labels: make([]*typesv1.LabelPair, 0, len(matchedLabelsPool)), 2027 } 2028 for _, label := range matchedLabelsPool { 2029 labels.Labels = append(labels.Labels, label.CloneVT()) 2030 } 2031 labelsSets = append(labelsSets, labels) 2032 } 2033 return labelsSets, nil 2034 } 2035 2036 func (b *singleBlockQuerier) Sort(in []Profile) []Profile { 2037 // Sort by RowNumber to avoid seeking back and forth in the file. 2038 sort.Slice(in, func(i, j int) bool { 2039 return in[i].(BlockProfile).rowNum < in[j].(BlockProfile).rowNum 2040 }) 2041 return in 2042 } 2043 2044 func (q *singleBlockQuerier) openTSDBIndex(ctx context.Context) error { 2045 f, err := q.bucket.Get(ctx, block.IndexFilename) 2046 if err != nil { 2047 return fmt.Errorf("opening index.tsdb file: %w", err) 2048 } 2049 defer func() { 2050 _ = f.Close() 2051 }() 2052 var buf []byte 2053 var tsdbIndexFile block.File 2054 for _, mf := range q.meta.Files { 2055 if mf.RelPath == block.IndexFilename { 2056 tsdbIndexFile = mf 2057 break 2058 } 2059 } 2060 if tsdbIndexFile.SizeBytes > 0 { 2061 // If index size is known beforehand, we can allocate 2062 // a buffer of the exact size to save some space. 2063 buf = make([]byte, tsdbIndexFile.SizeBytes) 2064 _, err = io.ReadFull(f, buf) 2065 } else { 2066 // 32KB is the default buf size of io.Copy. 2067 // It's unlikely that a tsdb index is less than that. 2068 b := bytes.NewBuffer(make([]byte, 0, 32<<10)) 2069 _, err = io.Copy(b, f) 2070 buf = b.Bytes() 2071 } 2072 if err != nil { 2073 return fmt.Errorf("reading tsdb index: %w", err) 2074 } 2075 2076 q.index, err = index.NewReader(index.RealByteSlice(buf)) 2077 if err != nil { 2078 return fmt.Errorf("opening tsdb index: %w", err) 2079 } 2080 return nil 2081 } 2082 2083 func (q *singleBlockQuerier) Open(ctx context.Context) error { 2084 q.openLock.Lock() 2085 defer q.openLock.Unlock() 2086 if !q.opened { 2087 if err := q.openFiles(ctx); err != nil { 2088 return err 2089 } 2090 } 2091 q.metrics.blockOpened.Inc() 2092 q.opened = true 2093 return nil 2094 } 2095 2096 // openFiles opens the parquet and tsdb files so they are ready for usage. 2097 func (q *singleBlockQuerier) openFiles(ctx context.Context) error { 2098 start := time.Now() 2099 sp, ctx := opentracing.StartSpanFromContext(ctx, "BlockQuerier - open") 2100 defer func() { 2101 q.metrics.blockOpeningLatency.Observe(time.Since(start).Seconds()) 2102 sp.LogFields( 2103 otlog.String("block_ulid", q.meta.ULID.String()), 2104 ) 2105 sp.Finish() 2106 }() 2107 2108 ctx = ContextWithBlockMetrics(ctx, q.metrics) 2109 g, ctx := errgroup.WithContext(ctx) 2110 g.Go(util.RecoverPanic(func() error { 2111 return q.openTSDBIndex(ctx) 2112 })) 2113 2114 // open parquet files 2115 for _, tableReader := range q.tables { 2116 tableReader := tableReader 2117 g.Go(util.RecoverPanic(func() error { 2118 return tableReader.open(ctx, q.bucket) 2119 })) 2120 } 2121 2122 g.Go(util.RecoverPanic(func() (err error) { 2123 switch q.meta.Version { 2124 case block.MetaVersion1: 2125 q.symbols, err = newSymbolsResolverV1(ctx, q.bucket, q.meta) 2126 case block.MetaVersion2: 2127 q.symbols, err = newSymbolsResolverV2(ctx, q.bucket, q.meta) 2128 case block.MetaVersion3: 2129 q.symbols, err = symdb.Open(ctx, q.bucket, q.meta) 2130 default: 2131 panic(fmt.Errorf("unsupported block version %d id %s", q.meta.Version, q.meta.ULID.String())) 2132 } 2133 return err 2134 })) 2135 2136 return g.Wait() 2137 } 2138 2139 func (b *singleBlockQuerier) profileSourceTable() *parquetReader[*schemav1.ProfilePersister] { 2140 return b.profiles[profileTableKey{}] 2141 } 2142 2143 func (b *singleBlockQuerier) profileTable(resolution time.Duration, aggregation typesv1.TimeSeriesAggregationType) (t *parquetReader[*schemav1.ProfilePersister]) { 2144 defer func() { 2145 if t != nil { 2146 b.metrics.profileTableAccess.WithLabelValues(t.meta.RelPath).Inc() 2147 } 2148 }() 2149 var ok bool 2150 t, ok = b.profiles[profileTableKey{ 2151 resolution: resolution, 2152 aggregation: downsampleAggregation(aggregation), 2153 }] 2154 if ok { 2155 return t 2156 } 2157 return b.profiles[profileTableKey{}] 2158 } 2159 2160 func (b *singleBlockQuerier) downsampleResolutions() []time.Duration { 2161 if len(b.profiles) < 2 { 2162 // b.profiles contains only the table of original resolution. 2163 return nil 2164 } 2165 resolutions := make([]time.Duration, 0, len(b.profiles)-1) 2166 for k := range b.profiles { 2167 if k.resolution > 0 { 2168 resolutions = append(resolutions, k.resolution) 2169 } 2170 } 2171 return resolutions 2172 } 2173 2174 func downsampleAggregation(v typesv1.TimeSeriesAggregationType) string { 2175 switch v { 2176 case typesv1.TimeSeriesAggregationType_TIME_SERIES_AGGREGATION_TYPE_SUM: 2177 return "sum" 2178 } 2179 return "" 2180 } 2181 2182 const profileTableName = "profiles" 2183 2184 func parseProfileTableName(n string) (profileTableKey, bool) { 2185 if n == profileTableName+block.ParquetSuffix { 2186 return profileTableKey{}, true 2187 } 2188 parts := strings.Split(strings.TrimSuffix(n, block.ParquetSuffix), "_") 2189 if len(parts) != 3 || parts[0] != profileTableName { 2190 return profileTableKey{}, false 2191 } 2192 r, err := time.ParseDuration(parts[1]) 2193 if err != nil { 2194 return profileTableKey{}, false 2195 } 2196 return profileTableKey{ 2197 resolution: r, 2198 aggregation: parts[2], 2199 }, true 2200 } 2201 2202 type parquetReader[P schemav1.PersisterName] struct { 2203 persister P 2204 file parquetobj.File 2205 meta block.File 2206 metrics *BlocksMetrics 2207 } 2208 2209 func (r *parquetReader[P]) open(ctx context.Context, bucketReader phlareobj.BucketReader) error { 2210 r.metrics = blockMetricsFromContext(ctx) 2211 return r.file.Open( 2212 ctx, 2213 bucketReader, 2214 r.meta, 2215 parquet.SkipBloomFilters(true), // we don't use bloom filters 2216 parquet.FileReadMode(parquet.ReadModeAsync), 2217 parquet.ReadBufferSize(parquetReadBufferSize), 2218 ) 2219 } 2220 2221 func (r *parquetReader[P]) Close() error { 2222 return r.file.Close() 2223 } 2224 2225 func (r *parquetReader[P]) relPath() string { 2226 return r.persister.Name() + block.ParquetSuffix 2227 } 2228 2229 func (r *parquetReader[P]) columnIter(ctx context.Context, columnName string, predicate query.Predicate, alias string) query.Iterator { 2230 index, _ := query.GetColumnIndexByPath(r.file.Root(), columnName) 2231 if index == -1 { 2232 return query.NewErrIterator(fmt.Errorf("column '%s' not found in parquet file '%s'", columnName, r.relPath())) 2233 } 2234 ctx = query.AddMetricsToContext(ctx, r.metrics.query) 2235 return query.NewSyncIterator(ctx, r.file.RowGroups(), index, columnName, 1000, predicate, alias) 2236 }