github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/sample_merge.go (about)

     1  package phlaredb
     2  
     3  import (
     4  	"context"
     5  	"strings"
     6  
     7  	"github.com/grafana/dskit/runutil"
     8  	"github.com/opentracing/opentracing-go"
     9  	"github.com/parquet-go/parquet-go"
    10  
    11  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    12  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    15  	"github.com/grafana/pyroscope/pkg/phlaredb/query"
    16  	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    17  	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
    18  )
    19  
    20  func (b *singleBlockQuerier) MergeByStacktraces(ctx context.Context, rows iter.Iterator[Profile], maxNodes int64) (*phlaremodel.Tree, error) {
    21  	sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeByStacktraces - Block")
    22  	defer sp.Finish()
    23  	sp.SetTag("block ULID", b.meta.ULID.String())
    24  
    25  	if err := b.Open(ctx); err != nil {
    26  		return nil, err
    27  	}
    28  	b.queries.Add(1)
    29  	defer b.queries.Done()
    30  
    31  	ctx = query.AddMetricsToContext(ctx, b.metrics.query)
    32  	r := symdb.NewResolver(ctx, b.symbols, symdb.WithResolverMaxNodes(maxNodes))
    33  	defer r.Release()
    34  	if err := mergeByStacktraces(ctx, b.profileSourceTable().file, rows, r); err != nil {
    35  		return nil, err
    36  	}
    37  	return r.Tree()
    38  }
    39  
    40  func (b *singleBlockQuerier) MergePprof(ctx context.Context, rows iter.Iterator[Profile], maxNodes int64, sts *typesv1.StackTraceSelector) (*profilev1.Profile, error) {
    41  	sp, ctx := opentracing.StartSpanFromContext(ctx, "MergePprof - Block")
    42  	defer sp.Finish()
    43  	sp.SetTag("block ULID", b.meta.ULID.String())
    44  
    45  	if err := b.Open(ctx); err != nil {
    46  		return nil, err
    47  	}
    48  	b.queries.Add(1)
    49  	defer b.queries.Done()
    50  
    51  	ctx = query.AddMetricsToContext(ctx, b.metrics.query)
    52  	r := symdb.NewResolver(ctx, b.symbols,
    53  		symdb.WithResolverMaxNodes(maxNodes),
    54  		symdb.WithResolverStackTraceSelector(sts))
    55  	defer r.Release()
    56  	if err := mergeByStacktraces(ctx, b.profileSourceTable().file, rows, r); err != nil {
    57  		return nil, err
    58  	}
    59  	return r.Pprof()
    60  }
    61  
    62  func (b *singleBlockQuerier) MergeByLabels(ctx context.Context, rows iter.Iterator[Profile], sts *typesv1.StackTraceSelector, by ...string) ([]*typesv1.Series, error) {
    63  	sp, ctx := opentracing.StartSpanFromContext(ctx, "MergeByLabels - Block")
    64  	defer sp.Finish()
    65  	sp.SetTag("block ULID", b.meta.ULID.String())
    66  
    67  	if err := b.Open(ctx); err != nil {
    68  		return nil, err
    69  	}
    70  	b.queries.Add(1)
    71  	defer b.queries.Done()
    72  
    73  	ctx = query.AddMetricsToContext(ctx, b.metrics.query)
    74  	if len(sts.GetCallSite()) == 0 {
    75  		columnName := "TotalValue"
    76  		if b.meta.Version == 1 {
    77  			columnName = "Samples.list.element.Value"
    78  		}
    79  		return mergeByLabels(ctx, b.profileSourceTable().file, columnName, rows, by...)
    80  	}
    81  	r := symdb.NewResolver(ctx, b.symbols,
    82  		symdb.WithResolverStackTraceSelector(sts))
    83  	defer r.Release()
    84  	return mergeByLabelsWithStackTraceSelector(ctx, b.profileSourceTable().file, rows, r, by...)
    85  }
    86  
    87  func (b *singleBlockQuerier) MergeBySpans(ctx context.Context, rows iter.Iterator[Profile], spanSelector phlaremodel.SpanSelector) (*phlaremodel.Tree, error) {
    88  	sp, _ := opentracing.StartSpanFromContext(ctx, "MergeBySpans - Block")
    89  	defer sp.Finish()
    90  	sp.SetTag("block ULID", b.meta.ULID.String())
    91  
    92  	if err := b.Open(ctx); err != nil {
    93  		return nil, err
    94  	}
    95  	b.queries.Add(1)
    96  	defer b.queries.Done()
    97  
    98  	ctx = query.AddMetricsToContext(ctx, b.metrics.query)
    99  	r := symdb.NewResolver(ctx, b.symbols)
   100  	defer r.Release()
   101  	if err := mergeBySpans(ctx, b.profileSourceTable().file, rows, r, spanSelector); err != nil {
   102  		return nil, err
   103  	}
   104  	return r.Tree()
   105  }
   106  
   107  type Source interface {
   108  	Schema() *parquet.Schema
   109  	RowGroups() []parquet.RowGroup
   110  }
   111  
   112  func mergeByStacktraces[T interface{ StacktracePartition() uint64 }](ctx context.Context, profileSource Source, rows iter.Iterator[T], r *symdb.Resolver,
   113  ) (err error) {
   114  	sp, ctx := opentracing.StartSpanFromContext(ctx, "mergeByStacktraces")
   115  	defer sp.Finish()
   116  	var columns v1.SampleColumns
   117  	if err = columns.Resolve(profileSource.Schema()); err != nil {
   118  		return err
   119  	}
   120  	profiles := query.NewRepeatedRowIterator(ctx, rows, profileSource.RowGroups(),
   121  		columns.StacktraceID.ColumnIndex,
   122  		columns.Value.ColumnIndex,
   123  	)
   124  	defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream")
   125  	for profiles.Next() {
   126  		p := profiles.At()
   127  		r.AddSamplesFromParquetRow(p.Row.StacktracePartition(), p.Values[0], p.Values[1])
   128  	}
   129  	return profiles.Err()
   130  }
   131  
   132  func mergeBySpans[T interface{ StacktracePartition() uint64 }](ctx context.Context, profileSource Source, rows iter.Iterator[T], r *symdb.Resolver, spanSelector phlaremodel.SpanSelector) (err error) {
   133  	sp, ctx := opentracing.StartSpanFromContext(ctx, "mergeBySpans")
   134  	defer sp.Finish()
   135  	var columns v1.SampleColumns
   136  	if err = columns.Resolve(profileSource.Schema()); err != nil {
   137  		return err
   138  	}
   139  	if !columns.HasSpanID() {
   140  		return nil
   141  	}
   142  	profiles := query.NewRepeatedRowIterator(ctx, rows, profileSource.RowGroups(),
   143  		columns.StacktraceID.ColumnIndex,
   144  		columns.Value.ColumnIndex,
   145  		columns.SpanID.ColumnIndex,
   146  	)
   147  	defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream")
   148  	for profiles.Next() {
   149  		p := profiles.At()
   150  		r.AddSamplesWithSpanSelectorFromParquetRow(
   151  			p.Row.StacktracePartition(),
   152  			p.Values[0],
   153  			p.Values[1],
   154  			p.Values[2],
   155  			spanSelector,
   156  		)
   157  	}
   158  	return profiles.Err()
   159  }
   160  
   161  func mergeByLabels[T Profile](
   162  	ctx context.Context,
   163  	profileSource Source,
   164  	columnName string,
   165  	rows iter.Iterator[T],
   166  	by ...string,
   167  ) ([]*typesv1.Series, error) {
   168  	column, err := v1.ResolveColumnByPath(profileSource.Schema(), strings.Split(columnName, "."))
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  
   173  	// these columns might not be present
   174  	annotationKeysColumn, _ := v1.ResolveColumnByPath(profileSource.Schema(), v1.AnnotationKeyColumnPath)
   175  	annotationValuesColumn, _ := v1.ResolveColumnByPath(profileSource.Schema(), v1.AnnotationValueColumnPath)
   176  
   177  	profiles := query.NewRepeatedRowIterator(
   178  		ctx,
   179  		rows,
   180  		profileSource.RowGroups(),
   181  		column.ColumnIndex,
   182  		annotationKeysColumn.ColumnIndex,
   183  		annotationValuesColumn.ColumnIndex,
   184  	)
   185  	defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream")
   186  
   187  	seriesBuilder := phlaremodel.NewTimeSeriesBuilder(by...)
   188  
   189  	for profiles.Next() {
   190  		values := profiles.At()
   191  		p := values.Row
   192  		var total int64
   193  		annotations := v1.Annotations{
   194  			Keys:   make([]string, 0),
   195  			Values: make([]string, 0),
   196  		}
   197  		for _, e := range values.Values {
   198  			if e[0].Column() == column.ColumnIndex && e[0].Kind() == parquet.Int64 {
   199  				total += e[0].Int64()
   200  			} else if e[0].Column() == annotationKeysColumn.ColumnIndex && e[0].Kind() == parquet.ByteArray {
   201  				annotations.Keys = append(annotations.Keys, e[0].String())
   202  			} else if e[0].Column() == annotationValuesColumn.ColumnIndex && e[0].Kind() == parquet.ByteArray {
   203  				annotations.Values = append(annotations.Values, e[0].String())
   204  			}
   205  		}
   206  		seriesBuilder.Add(
   207  			p.Fingerprint(),
   208  			p.Labels(),
   209  			int64(p.Timestamp()),
   210  			float64(total),
   211  			annotations,
   212  			"",
   213  		)
   214  	}
   215  	return seriesBuilder.Build(), profiles.Err()
   216  }
   217  
   218  func mergeByLabelsWithStackTraceSelector[T Profile](
   219  	ctx context.Context,
   220  	profileSource Source,
   221  	rows iter.Iterator[T],
   222  	r *symdb.Resolver,
   223  	by ...string,
   224  ) (s []*typesv1.Series, err error) {
   225  	var columns v1.SampleColumns
   226  	if err = columns.Resolve(profileSource.Schema()); err != nil {
   227  		return nil, err
   228  	}
   229  	profiles := query.NewRepeatedRowIterator(ctx, rows, profileSource.RowGroups(),
   230  		columns.StacktraceID.ColumnIndex,
   231  		columns.Value.ColumnIndex,
   232  	)
   233  
   234  	seriesBuilder := phlaremodel.TimeSeriesBuilder{}
   235  	seriesBuilder.Init(by...)
   236  
   237  	defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream")
   238  	var v symdb.CallSiteValues
   239  	for profiles.Next() {
   240  		row := profiles.At()
   241  		h := row.Row
   242  		if err = r.CallSiteValuesParquet(&v, h.StacktracePartition(), row.Values[0], row.Values[1]); err != nil {
   243  			return nil, err
   244  		}
   245  		// TODO aleks-p: add annotation support?
   246  		seriesBuilder.Add(h.Fingerprint(), h.Labels(), int64(h.Timestamp()), float64(v.Total), v1.Annotations{}, "")
   247  	}
   248  
   249  	return seriesBuilder.Build(), profiles.Err()
   250  }