github.com/grafana/pyroscope@v1.18.0/pkg/querybackend/query_profile_entry.go (about)

     1  package querybackend
     2  
     3  import (
     4  	"slices"
     5  	"strings"
     6  
     7  	"github.com/google/uuid"
     8  	"github.com/parquet-go/parquet-go"
     9  	"github.com/prometheus/common/model"
    10  	"github.com/prometheus/prometheus/model/labels"
    11  
    12  	"github.com/grafana/pyroscope/pkg/iter"
    13  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    14  	"github.com/grafana/pyroscope/pkg/phlaredb"
    15  	parquetquery "github.com/grafana/pyroscope/pkg/phlaredb/query"
    16  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    17  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    18  )
    19  
    20  // As we expect rows to be very small, we want to fetch a bigger
    21  // batch of rows at once to amortize the latency of reading.
    22  const bigBatchSize = 2 << 10
    23  
    24  type ProfileEntry struct {
    25  	RowNum      int64
    26  	Timestamp   model.Time
    27  	Fingerprint model.Fingerprint
    28  	Labels      phlaremodel.Labels
    29  	Partition   uint64
    30  	ID          string
    31  }
    32  
    33  func (e ProfileEntry) RowNumber() int64 { return e.RowNum }
    34  
    35  type profileIteratorOption struct {
    36  	iterator func(*iteratorOpts)
    37  	series   func(*seriesOpts)
    38  }
    39  
    40  func withAllLabels() profileIteratorOption {
    41  	return profileIteratorOption{
    42  		series: func(opts *seriesOpts) {
    43  			opts.allLabels = true
    44  		},
    45  	}
    46  }
    47  
    48  func withGroupByLabels(by ...string) profileIteratorOption {
    49  	return profileIteratorOption{
    50  		series: func(opts *seriesOpts) {
    51  			opts.groupBy = by
    52  		},
    53  	}
    54  }
    55  
    56  func withFetchPartition(v bool) profileIteratorOption {
    57  	return profileIteratorOption{
    58  		iterator: func(opts *iteratorOpts) {
    59  			opts.fetchPartition = v
    60  		},
    61  	}
    62  }
    63  
    64  func withFetchProfileIDs(v bool) profileIteratorOption {
    65  	return profileIteratorOption{
    66  		iterator: func(opts *iteratorOpts) {
    67  			opts.fetchProfileIDs = v
    68  		},
    69  	}
    70  }
    71  
    72  func withProfileIDSelector(ids ...string) (profileIteratorOption, error) {
    73  	// convert profile ids into uuids
    74  	uuids := make([]string, 0, len(ids))
    75  	for _, id := range ids {
    76  		u, err := uuid.Parse(id)
    77  		if err != nil {
    78  			return profileIteratorOption{}, err
    79  		}
    80  		uuids = append(uuids, string(u[:]))
    81  	}
    82  
    83  	return profileIteratorOption{
    84  		iterator: func(opts *iteratorOpts) {
    85  			opts.profileIDSelector = uuids
    86  		},
    87  	}, nil
    88  }
    89  
    90  type iteratorOpts struct {
    91  	profileIDSelector []string // this is a slice of the byte form of the UUID
    92  	fetchProfileIDs   bool
    93  	fetchPartition    bool
    94  }
    95  
    96  func iteratorOptsFromOptions(options []profileIteratorOption) iteratorOpts {
    97  	opts := iteratorOpts{
    98  		fetchPartition: true,
    99  	}
   100  	for _, f := range options {
   101  		if f.iterator != nil {
   102  			f.iterator(&opts)
   103  		}
   104  	}
   105  	return opts
   106  }
   107  
   108  type queryColumn struct {
   109  	name      string
   110  	predicate parquetquery.Predicate
   111  	priority  int
   112  }
   113  
   114  type queryColumns []queryColumn
   115  
   116  func (c queryColumns) names() []string {
   117  	result := make([]string, len(c))
   118  	for idx := range result {
   119  		result[idx] = c[idx].name
   120  	}
   121  	return result
   122  }
   123  
   124  func (c queryColumns) join(q *queryContext) parquetquery.Iterator {
   125  	var result parquetquery.Iterator
   126  
   127  	// sort columns by priority, without modifying queryColumn slice
   128  	order := make([]int, len(c))
   129  	for idx := range order {
   130  		order[idx] = idx
   131  	}
   132  	slices.SortFunc(order, func(a, b int) int {
   133  		if r := c[a].priority - c[b].priority; r != 0 {
   134  			return r
   135  		}
   136  		return strings.Compare(c[a].name, c[b].name)
   137  	})
   138  
   139  	for _, idx := range order {
   140  		it := q.ds.Profiles().Column(q.ctx, c[idx].name, c[idx].predicate)
   141  		if result == nil {
   142  			result = it
   143  			continue
   144  		}
   145  		result = parquetquery.NewBinaryJoinIterator(0,
   146  			result,
   147  			it,
   148  		)
   149  	}
   150  	return result
   151  }
   152  
   153  func profileEntryIterator(q *queryContext, options ...profileIteratorOption) (iter.Iterator[ProfileEntry], error) {
   154  	opts := iteratorOptsFromOptions(options)
   155  
   156  	series, err := getSeries(q.ds.Index(), q.req.matchers, options...)
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	columns := queryColumns{
   162  		{schemav1.SeriesIndexColumnName, parquetquery.NewMapPredicate(series), 10},
   163  		{schemav1.TimeNanosColumnName, parquetquery.NewIntBetweenPredicate(q.req.startTime, q.req.endTime), 15},
   164  	}
   165  	processor := []func([][]parquet.Value, *ProfileEntry){}
   166  
   167  	// fetch partition if requested
   168  	if opts.fetchPartition {
   169  		offset := len(columns)
   170  		columns = append(
   171  			columns,
   172  			queryColumn{schemav1.StacktracePartitionColumnName, nil, 20},
   173  		)
   174  		processor = append(processor, func(buf [][]parquet.Value, e *ProfileEntry) {
   175  			e.Partition = buf[offset][0].Uint64()
   176  		})
   177  	}
   178  	// fetch profile id if requested or part of the predicate
   179  	if opts.fetchProfileIDs || len(opts.profileIDSelector) > 0 {
   180  		var (
   181  			predicate parquetquery.Predicate
   182  			priority  = 20
   183  		)
   184  		if len(opts.profileIDSelector) > 0 {
   185  			predicate = parquetquery.NewStringInPredicate(opts.profileIDSelector)
   186  			priority = 5
   187  		}
   188  		offset := len(columns)
   189  		columns = append(
   190  			columns,
   191  			queryColumn{schemav1.IDColumnName, predicate, priority},
   192  		)
   193  		var u uuid.UUID
   194  		processor = append(processor, func(buf [][]parquet.Value, e *ProfileEntry) {
   195  			b := buf[offset][0].Bytes()
   196  			if len(b) != 16 {
   197  				return
   198  			}
   199  			copy(u[:], b)
   200  			e.ID = u.String()
   201  		})
   202  	}
   203  
   204  	buf := make([][]parquet.Value, len(columns))
   205  	columnNames := columns.names()
   206  
   207  	entries := iter.NewAsyncBatchIterator[*parquetquery.IteratorResult, ProfileEntry](
   208  		columns.join(q), bigBatchSize,
   209  		func(r *parquetquery.IteratorResult) ProfileEntry {
   210  			buf = r.Columns(buf, columnNames...)
   211  			x := series[buf[0][0].Uint32()]
   212  			e := ProfileEntry{
   213  				RowNum:      r.RowNumber[0],
   214  				Timestamp:   model.TimeFromUnixNano(buf[1][0].Int64()),
   215  				Fingerprint: x.fingerprint,
   216  				Labels:      x.labels,
   217  			}
   218  			for _, proc := range processor {
   219  				proc(buf, &e)
   220  			}
   221  			return e
   222  		},
   223  		func([]ProfileEntry) {},
   224  	)
   225  	return entries, nil
   226  }
   227  
   228  type series struct {
   229  	fingerprint model.Fingerprint
   230  	labels      phlaremodel.Labels
   231  }
   232  
   233  type seriesOpts struct {
   234  	allLabels bool // when this is true, groupBy is ignored
   235  	groupBy   []string
   236  }
   237  
   238  func getSeries(reader phlaredb.IndexReader, matchers []*labels.Matcher, options ...profileIteratorOption) (map[uint32]series, error) {
   239  	var opts seriesOpts
   240  	for _, f := range options {
   241  		if f.series != nil {
   242  			f.series(&opts)
   243  		}
   244  	}
   245  
   246  	postings, err := getPostings(reader, matchers...)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	chunks := make([]index.ChunkMeta, 1)
   251  	s := make(map[uint32]series)
   252  	l := make(phlaremodel.Labels, 0, 6)
   253  	for postings.Next() {
   254  		var fp uint64
   255  		if opts.allLabels {
   256  			fp, err = reader.Series(postings.At(), &l, &chunks)
   257  		} else {
   258  			fp, err = reader.SeriesBy(postings.At(), &l, &chunks, opts.groupBy...)
   259  		}
   260  		if err != nil {
   261  			return nil, err
   262  		}
   263  		_, ok := s[chunks[0].SeriesIndex]
   264  		if ok {
   265  			continue
   266  		}
   267  		s[chunks[0].SeriesIndex] = series{
   268  			fingerprint: model.Fingerprint(fp),
   269  			labels:      l.Clone(),
   270  		}
   271  	}
   272  	return s, postings.Err()
   273  }
   274  
   275  func getPostings(reader phlaredb.IndexReader, matchers ...*labels.Matcher) (index.Postings, error) {
   276  	if len(matchers) == 0 {
   277  		k, v := index.AllPostingsKey()
   278  		return reader.Postings(k, nil, v)
   279  	}
   280  	return phlaredb.PostingsForMatchers(reader, nil, matchers...)
   281  }
   282  
   283  func getSeriesIDs(reader phlaredb.IndexReader, matchers ...*labels.Matcher) (map[uint32]struct{}, error) {
   284  	postings, err := getPostings(reader, matchers...)
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	defer func() {
   289  		_ = postings.Close()
   290  	}()
   291  	visited := make(map[uint32]struct{})
   292  	chunks := make([]index.ChunkMeta, 1)
   293  	for postings.Next() {
   294  		if _, err = reader.Series(postings.At(), nil, &chunks); err != nil {
   295  			return nil, err
   296  		}
   297  		visited[chunks[0].SeriesIndex] = struct{}{}
   298  	}
   299  	if err = postings.Err(); err != nil {
   300  		return nil, err
   301  	}
   302  	return visited, nil
   303  }