github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/row_profile.go

github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/row_profile.go (about)

     1  package phlaredb
     2  
     3  import (
     4  	"github.com/parquet-go/parquet-go"
     5  	"github.com/prometheus/common/model"
     6  
     7  	"github.com/grafana/pyroscope/pkg/iter"
     8  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
     9  	"github.com/grafana/pyroscope/pkg/phlaredb/query"
    10  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    11  )
    12  
    13  type rowProfile struct {
    14  	rowNum    int64
    15  	partition uint64
    16  }
    17  
    18  func (p rowProfile) StacktracePartition() uint64 {
    19  	return p.partition
    20  }
    21  
    22  func (p rowProfile) RowNumber() int64 {
    23  	return p.rowNum
    24  }
    25  
    26  // RowsIterator is an iterator over rows of a parquet table.
    27  // It is a wrapper over query.Iterator to transform its results into a desired type.
    28  type RowsIterator[T any] struct {
    29  	rows    query.Iterator
    30  	current T
    31  	at      func(*query.IteratorResult) T
    32  }
    33  
    34  func (it *RowsIterator[T]) Next() bool {
    35  	if it.rows.Next() {
    36  		it.current = it.at(it.rows.At())
    37  		return true
    38  	}
    39  	return false
    40  }
    41  
    42  func (it *RowsIterator[T]) Close() error {
    43  	return it.rows.Close()
    44  }
    45  
    46  func (it *RowsIterator[T]) Err() error {
    47  	return it.rows.Err()
    48  }
    49  
    50  func (it *RowsIterator[T]) At() T {
    51  	return it.current
    52  }
    53  
    54  // The size of the batch is chosen empirically.
    55  const profileRowAsyncBatchSize = 1 << 10
    56  
    57  func profileRowBatchIterator(it iter.Iterator[*query.IteratorResult]) iter.Iterator[rowProfile] {
    58  	return iter.NewAsyncBatchIterator[*query.IteratorResult, rowProfile](
    59  		it, profileRowAsyncBatchSize,
    60  		func(r *query.IteratorResult) rowProfile {
    61  			return rowProfile{
    62  				rowNum:    r.RowNumber[0],
    63  				partition: r.ColumnValue(schemav1.StacktracePartitionColumnName).Uint64(),
    64  			}
    65  		},
    66  		func(t []rowProfile) {},
    67  	)
    68  }
    69  
    70  func profileBatchIteratorBySeriesIndex(
    71  	it iter.Iterator[*query.IteratorResult],
    72  	series map[int64]labelsInfo,
    73  ) iter.Iterator[Profile] {
    74  	buf := make([][]parquet.Value, 3)
    75  	return iter.NewAsyncBatchIterator[*query.IteratorResult, Profile](
    76  		it, profileRowAsyncBatchSize,
    77  		func(r *query.IteratorResult) Profile {
    78  			buf = r.Columns(buf,
    79  				schemav1.SeriesIndexColumnName,
    80  				schemav1.TimeNanosColumnName,
    81  				schemav1.StacktracePartitionColumnName)
    82  			x := series[buf[0][0].Int64()]
    83  			return BlockProfile{
    84  				rowNum:      r.RowNumber[0],
    85  				timestamp:   model.TimeFromUnixNano(buf[1][0].Int64()),
    86  				partition:   retrieveStacktracePartition(buf, 2),
    87  				fingerprint: x.fp,
    88  				labels:      x.lbs,
    89  			}
    90  		},
    91  		func(t []Profile) {},
    92  	)
    93  }
    94  
    95  func profileBatchIteratorByFingerprints(
    96  	it iter.Iterator[*query.IteratorResult],
    97  	labels map[model.Fingerprint]phlaremodel.Labels,
    98  ) iter.Iterator[Profile] {
    99  	return iter.NewAsyncBatchIterator[*query.IteratorResult, Profile](
   100  		it, profileRowAsyncBatchSize,
   101  		func(r *query.IteratorResult) Profile {
   102  			v, ok := r.Entries[0].RowValue.(fingerprintWithRowNum)
   103  			if !ok {
   104  				panic("no fingerprint information found")
   105  			}
   106  			l, ok := labels[v.fp]
   107  			if !ok {
   108  				panic("no profile series labels with matching fingerprint found")
   109  			}
   110  			return BlockProfile{
   111  				rowNum:      r.RowNumber[0],
   112  				timestamp:   model.TimeFromUnixNano(r.ColumnValue(schemav1.TimeNanosColumnName).Int64()),
   113  				partition:   r.ColumnValue(schemav1.StacktracePartitionColumnName).Uint64(),
   114  				fingerprint: v.fp,
   115  				labels:      l,
   116  			}
   117  		},
   118  		func(t []Profile) {},
   119  	)
   120  }