github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/row_profile.go (about) 1 package phlaredb 2 3 import ( 4 "github.com/parquet-go/parquet-go" 5 "github.com/prometheus/common/model" 6 7 "github.com/grafana/pyroscope/pkg/iter" 8 phlaremodel "github.com/grafana/pyroscope/pkg/model" 9 "github.com/grafana/pyroscope/pkg/phlaredb/query" 10 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 11 ) 12 13 type rowProfile struct { 14 rowNum int64 15 partition uint64 16 } 17 18 func (p rowProfile) StacktracePartition() uint64 { 19 return p.partition 20 } 21 22 func (p rowProfile) RowNumber() int64 { 23 return p.rowNum 24 } 25 26 // RowsIterator is an iterator over rows of a parquet table. 27 // It is a wrapper over query.Iterator to transform its results into a desired type. 28 type RowsIterator[T any] struct { 29 rows query.Iterator 30 current T 31 at func(*query.IteratorResult) T 32 } 33 34 func (it *RowsIterator[T]) Next() bool { 35 if it.rows.Next() { 36 it.current = it.at(it.rows.At()) 37 return true 38 } 39 return false 40 } 41 42 func (it *RowsIterator[T]) Close() error { 43 return it.rows.Close() 44 } 45 46 func (it *RowsIterator[T]) Err() error { 47 return it.rows.Err() 48 } 49 50 func (it *RowsIterator[T]) At() T { 51 return it.current 52 } 53 54 // The size of the batch is chosen empirically. 55 const profileRowAsyncBatchSize = 1 << 10 56 57 func profileRowBatchIterator(it iter.Iterator[*query.IteratorResult]) iter.Iterator[rowProfile] { 58 return iter.NewAsyncBatchIterator[*query.IteratorResult, rowProfile]( 59 it, profileRowAsyncBatchSize, 60 func(r *query.IteratorResult) rowProfile { 61 return rowProfile{ 62 rowNum: r.RowNumber[0], 63 partition: r.ColumnValue(schemav1.StacktracePartitionColumnName).Uint64(), 64 } 65 }, 66 func(t []rowProfile) {}, 67 ) 68 } 69 70 func profileBatchIteratorBySeriesIndex( 71 it iter.Iterator[*query.IteratorResult], 72 series map[int64]labelsInfo, 73 ) iter.Iterator[Profile] { 74 buf := make([][]parquet.Value, 3) 75 return iter.NewAsyncBatchIterator[*query.IteratorResult, Profile]( 76 it, profileRowAsyncBatchSize, 77 func(r *query.IteratorResult) Profile { 78 buf = r.Columns(buf, 79 schemav1.SeriesIndexColumnName, 80 schemav1.TimeNanosColumnName, 81 schemav1.StacktracePartitionColumnName) 82 x := series[buf[0][0].Int64()] 83 return BlockProfile{ 84 rowNum: r.RowNumber[0], 85 timestamp: model.TimeFromUnixNano(buf[1][0].Int64()), 86 partition: retrieveStacktracePartition(buf, 2), 87 fingerprint: x.fp, 88 labels: x.lbs, 89 } 90 }, 91 func(t []Profile) {}, 92 ) 93 } 94 95 func profileBatchIteratorByFingerprints( 96 it iter.Iterator[*query.IteratorResult], 97 labels map[model.Fingerprint]phlaremodel.Labels, 98 ) iter.Iterator[Profile] { 99 return iter.NewAsyncBatchIterator[*query.IteratorResult, Profile]( 100 it, profileRowAsyncBatchSize, 101 func(r *query.IteratorResult) Profile { 102 v, ok := r.Entries[0].RowValue.(fingerprintWithRowNum) 103 if !ok { 104 panic("no fingerprint information found") 105 } 106 l, ok := labels[v.fp] 107 if !ok { 108 panic("no profile series labels with matching fingerprint found") 109 } 110 return BlockProfile{ 111 rowNum: r.RowNumber[0], 112 timestamp: model.TimeFromUnixNano(r.ColumnValue(schemav1.TimeNanosColumnName).Int64()), 113 partition: r.ColumnValue(schemav1.StacktracePartitionColumnName).Uint64(), 114 fingerprint: v.fp, 115 labels: l, 116 } 117 }, 118 func(t []Profile) {}, 119 ) 120 }