github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/profiles.go (about)

     1  package phlaredb
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"sync"
     8  
     9  	"github.com/gogo/status"
    10  	"github.com/opentracing/opentracing-go"
    11  	"github.com/pkg/errors"
    12  	"github.com/prometheus/common/model"
    13  	"github.com/prometheus/prometheus/model/labels"
    14  	"github.com/prometheus/prometheus/promql/parser"
    15  	"github.com/prometheus/prometheus/storage"
    16  	"github.com/samber/lo"
    17  	"go.uber.org/atomic"
    18  	"google.golang.org/grpc/codes"
    19  
    20  	ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1"
    21  	"github.com/grafana/pyroscope/pkg/iter"
    22  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    23  	"github.com/grafana/pyroscope/pkg/phlaredb/query"
    24  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    25  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb"
    26  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    27  )
    28  
    29  // delta encoding for ranges
    30  type rowRange struct {
    31  	rowNum int64
    32  	length int
    33  }
    34  
    35  type rowRangeWithSeriesIndex struct {
    36  	*rowRange
    37  	seriesIndex uint32
    38  }
    39  
    40  // those need to be strictly ordered
    41  type rowRangesWithSeriesIndex []rowRangeWithSeriesIndex
    42  
    43  // getSeriesIndex returns the series index for a given row number.
    44  // searchHint is the hint for the index to start searching from, it should be passed to next call of this function.
    45  func (s rowRangesWithSeriesIndex) getSeriesIndex(rowNum int64, searchHint *int) uint32 {
    46  	if *searchHint < 0 || *searchHint >= len(s) {
    47  		*searchHint = 0
    48  	}
    49  	for i := *searchHint; i < len(s); i++ {
    50  		rg := s[i]
    51  		// it is possible that the series is not existing
    52  		if rg.rowRange == nil {
    53  			continue
    54  		}
    55  		if rg.rowNum <= rowNum && rg.rowNum+int64(rg.length) > rowNum {
    56  			*searchHint = i
    57  			return rg.seriesIndex
    58  		}
    59  	}
    60  	panic("series index not found")
    61  }
    62  
    63  type rowRanges map[rowRange]model.Fingerprint
    64  
    65  func (rR rowRanges) iter() iter.Iterator[fingerprintWithRowNum] {
    66  	// ensure row ranges is sorted
    67  	rRSlice := lo.Keys(rR)
    68  	sort.Slice(rRSlice, func(i, j int) bool {
    69  		return rRSlice[i].rowNum < rRSlice[j].rowNum
    70  	})
    71  
    72  	fps := make([]model.Fingerprint, 0, len(rR))
    73  	for _, elem := range rRSlice {
    74  		fps = append(fps, rR[elem])
    75  	}
    76  
    77  	return &rowRangesIter{
    78  		r:   rRSlice,
    79  		fps: fps,
    80  		pos: 0,
    81  	}
    82  }
    83  
    84  type fingerprintWithRowNum struct {
    85  	fp     model.Fingerprint
    86  	rowNum int64
    87  }
    88  
    89  func (f fingerprintWithRowNum) RowNumber() int64 {
    90  	return f.rowNum
    91  }
    92  
    93  func (r rowRanges) fingerprintsWithRowNum() query.Iterator {
    94  	return query.NewRowNumberIterator(r.iter())
    95  }
    96  
    97  type rowRangesIter struct {
    98  	r   []rowRange
    99  	fps []model.Fingerprint
   100  	pos int64
   101  }
   102  
   103  func (i *rowRangesIter) At() fingerprintWithRowNum {
   104  	return fingerprintWithRowNum{
   105  		rowNum: i.pos - 1,
   106  		fp:     i.fps[0],
   107  	}
   108  }
   109  
   110  func (i *rowRangesIter) Next() bool {
   111  	if len(i.r) == 0 {
   112  		return false
   113  	}
   114  	if i.pos < i.r[0].rowNum {
   115  		i.pos = i.r[0].rowNum
   116  	}
   117  
   118  	if i.pos >= i.r[0].rowNum+int64(i.r[0].length) {
   119  		i.r = i.r[1:]
   120  		i.fps = i.fps[1:]
   121  		return i.Next()
   122  	}
   123  	i.pos++
   124  	return true
   125  }
   126  
   127  func (i *rowRangesIter) Close() error { return nil }
   128  
   129  func (i *rowRangesIter) Err() error { return nil }
   130  
   131  type profileSeries struct {
   132  	lbs phlaremodel.Labels
   133  	fp  model.Fingerprint
   134  
   135  	minTime, maxTime int64
   136  
   137  	// profiles in memory
   138  	profiles []*schemav1.InMemoryProfile
   139  
   140  	// profiles temporary stored on disk in row group segements
   141  	// TODO: this information is crucial to recover segements to a full block later
   142  	profilesOnDisk []*rowRange
   143  }
   144  
   145  type profilesIndex struct {
   146  	ix *tsdb.BitPrefixInvertedIndex
   147  	// todo: like the inverted index we might want to shard fingerprint to avoid contentions.
   148  	profilesPerFP   map[model.Fingerprint]*profileSeries
   149  	mutex           sync.RWMutex
   150  	totalProfiles   *atomic.Int64
   151  	totalSeries     *atomic.Int64
   152  	rowGroupsOnDisk int
   153  
   154  	metrics *headMetrics
   155  }
   156  
   157  func newProfileIndex(totalShards uint32, metrics *headMetrics) (*profilesIndex, error) {
   158  	ix, err := tsdb.NewBitPrefixWithShards(totalShards)
   159  	if err != nil {
   160  		return nil, err
   161  	}
   162  	return &profilesIndex{
   163  		ix:            ix,
   164  		profilesPerFP: make(map[model.Fingerprint]*profileSeries),
   165  		totalProfiles: atomic.NewInt64(0),
   166  		totalSeries:   atomic.NewInt64(0),
   167  		metrics:       metrics,
   168  	}, nil
   169  }
   170  
   171  // Add a new set of profile to the index.
   172  // The seriesRef are expected to match the profile labels passed in.
   173  func (pi *profilesIndex) Add(ps *schemav1.InMemoryProfile, lbs phlaremodel.Labels, profileName string) {
   174  	pi.mutex.Lock()
   175  	defer pi.mutex.Unlock()
   176  	profiles, ok := pi.profilesPerFP[ps.SeriesFingerprint]
   177  	if !ok {
   178  		lbs := pi.ix.Add(lbs, ps.SeriesFingerprint)
   179  		profiles = &profileSeries{
   180  			lbs:            lbs,
   181  			fp:             ps.SeriesFingerprint,
   182  			minTime:        ps.TimeNanos,
   183  			maxTime:        ps.TimeNanos,
   184  			profilesOnDisk: make([]*rowRange, pi.rowGroupsOnDisk),
   185  		}
   186  		pi.profilesPerFP[ps.SeriesFingerprint] = profiles
   187  		pi.metrics.series.Set(float64(pi.totalSeries.Inc()))
   188  		pi.metrics.seriesCreated.WithLabelValues(profileName).Inc()
   189  	}
   190  
   191  	// profile is latest in this series, use a shortcut
   192  	if ps.TimeNanos > profiles.maxTime {
   193  		// update max timeNanos
   194  		profiles.maxTime = ps.TimeNanos
   195  
   196  		// add profile to in memory slice
   197  		profiles.profiles = append(profiles.profiles, ps)
   198  	} else {
   199  		// use binary search to find position
   200  		i := sort.Search(len(profiles.profiles), func(i int) bool {
   201  			return profiles.profiles[i].TimeNanos > ps.TimeNanos
   202  		})
   203  
   204  		// insert into slice at correct position
   205  		profiles.profiles = append(profiles.profiles, &schemav1.InMemoryProfile{})
   206  		copy(profiles.profiles[i+1:], profiles.profiles[i:])
   207  		profiles.profiles[i] = ps
   208  	}
   209  
   210  	if ps.TimeNanos < profiles.minTime {
   211  		profiles.minTime = ps.TimeNanos
   212  	}
   213  
   214  	pi.metrics.profiles.Set(float64(pi.totalProfiles.Inc()))
   215  	pi.metrics.profilesCreated.WithLabelValues(profileName).Inc()
   216  }
   217  
   218  func (pi *profilesIndex) selectMatchingFPs(ctx context.Context, params *ingestv1.SelectProfilesRequest) ([]model.Fingerprint, error) {
   219  	sp, _ := opentracing.StartSpanFromContext(ctx, "selectMatchingFPs - Index")
   220  	defer sp.Finish()
   221  	selectors, err := parser.ParseMetricSelector(params.LabelSelector)
   222  	if err != nil {
   223  		return nil, status.Error(codes.InvalidArgument, "failed to parse label selectors: "+err.Error())
   224  	}
   225  	if params.Type == nil {
   226  		return nil, errors.New("no profileType given")
   227  	}
   228  	selectors = append(selectors, phlaremodel.SelectorFromProfileType(params.Type))
   229  
   230  	filters, matchers := SplitFiltersAndMatchers(selectors)
   231  	ids, err := pi.ix.Lookup(matchers, nil)
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	pi.mutex.RLock()
   237  	defer pi.mutex.RUnlock()
   238  
   239  	// filter fingerprints that no longer exist or don't match the filters
   240  	var idx int
   241  outer:
   242  	for _, fp := range ids {
   243  		profile, ok := pi.profilesPerFP[fp]
   244  		if !ok {
   245  			// If a profile labels is missing here, it has already been flushed
   246  			// and is supposed to be picked up from storage by querier
   247  			continue
   248  		}
   249  		for _, filter := range filters {
   250  			if !filter.Matches(profile.lbs.Get(filter.Name)) {
   251  				continue outer
   252  			}
   253  		}
   254  
   255  		// keep this one
   256  		ids[idx] = fp
   257  		idx++
   258  	}
   259  
   260  	sp.SetTag("matchedSeries", idx)
   261  
   262  	return ids[:idx], nil
   263  }
   264  
   265  func (pi *profilesIndex) selectMatchingRowRanges(ctx context.Context, params *ingestv1.SelectProfilesRequest, rowGroupIdx int) (
   266  	query.Iterator,
   267  	map[model.Fingerprint]phlaremodel.Labels,
   268  	error,
   269  ) {
   270  	sp, ctx := opentracing.StartSpanFromContext(ctx, "selectMatchingRowRanges - Index")
   271  	defer sp.Finish()
   272  
   273  	ids, err := pi.selectMatchingFPs(ctx, params)
   274  	if err != nil {
   275  		return nil, nil, err
   276  	}
   277  
   278  	// gather rowRanges and labels from matching series under read lock of the index
   279  	var (
   280  		rowRanges   = make(rowRanges, len(ids))
   281  		labelsPerFP = make(map[model.Fingerprint]phlaremodel.Labels, len(ids))
   282  	)
   283  
   284  	pi.mutex.RLock()
   285  	defer pi.mutex.RUnlock()
   286  
   287  	for _, fp := range ids {
   288  		// skip if series no longer in index
   289  		profileSeries, ok := pi.profilesPerFP[fp]
   290  		if !ok {
   291  			continue
   292  		}
   293  
   294  		labelsPerFP[fp] = profileSeries.lbs
   295  
   296  		// skip if rowRange empty
   297  		rR := profileSeries.profilesOnDisk[rowGroupIdx]
   298  		if rR == nil {
   299  			continue
   300  		}
   301  
   302  		rowRanges[*rR] = fp
   303  	}
   304  
   305  	sp.SetTag("rowGroupSegment", rowGroupIdx)
   306  	sp.SetTag("matchedRowRangesCount", len(rowRanges))
   307  
   308  	return rowRanges.fingerprintsWithRowNum(), labelsPerFP, nil
   309  }
   310  
   311  type ProfileWithLabels struct {
   312  	profile *schemav1.InMemoryProfile
   313  	lbs     phlaremodel.Labels
   314  	fp      model.Fingerprint
   315  }
   316  
   317  func (p ProfileWithLabels) RowNumber() int64 { return 0 }
   318  
   319  func (p ProfileWithLabels) StacktracePartition() uint64 {
   320  	return p.profile.StacktracePartition
   321  }
   322  
   323  func (p ProfileWithLabels) Timestamp() model.Time {
   324  	return model.TimeFromUnixNano(p.profile.TimeNanos)
   325  }
   326  
   327  func (p ProfileWithLabels) Fingerprint() model.Fingerprint {
   328  	return p.fp
   329  }
   330  
   331  func (p ProfileWithLabels) Labels() phlaremodel.Labels {
   332  	return p.lbs
   333  }
   334  
   335  func (p ProfileWithLabels) Samples() schemav1.Samples {
   336  	return p.profile.Samples
   337  }
   338  
   339  func (p ProfileWithLabels) Total() int64 {
   340  	return int64(p.profile.TotalValue)
   341  }
   342  
   343  func (p ProfileWithLabels) Annotations() schemav1.Annotations {
   344  	return p.profile.Annotations
   345  }
   346  
   347  type SeriesIterator struct {
   348  	iter.Iterator[*schemav1.InMemoryProfile]
   349  	curr ProfileWithLabels
   350  	fp   model.Fingerprint
   351  	lbs  phlaremodel.Labels
   352  }
   353  
   354  func NewSeriesIterator(labels phlaremodel.Labels, fingerprint model.Fingerprint, it iter.Iterator[*schemav1.InMemoryProfile]) *SeriesIterator {
   355  	return &SeriesIterator{
   356  		Iterator: it,
   357  		fp:       fingerprint,
   358  		lbs:      labels,
   359  	}
   360  }
   361  
   362  func (it *SeriesIterator) Next() bool {
   363  	if !it.Iterator.Next() {
   364  		return false
   365  	}
   366  	it.curr = ProfileWithLabels{
   367  		profile: it.Iterator.At(),
   368  		lbs:     it.lbs,
   369  		fp:      it.fp,
   370  	}
   371  	return true
   372  }
   373  
   374  func (it *SeriesIterator) At() Profile {
   375  	return it.curr
   376  }
   377  
   378  // forMatchingLabels iterates through all matching label sets and calls f for each labels set.
   379  func (pi *profilesIndex) forMatchingLabels(matchers []*labels.Matcher,
   380  	fn func(lbs phlaremodel.Labels, fp model.Fingerprint) error,
   381  ) error {
   382  	filters, matchers := SplitFiltersAndMatchers(matchers)
   383  	ids, err := pi.ix.Lookup(matchers, nil)
   384  	if err != nil {
   385  		return err
   386  	}
   387  
   388  	pi.mutex.RLock()
   389  	defer pi.mutex.RUnlock()
   390  
   391  outer:
   392  	for _, fp := range ids {
   393  		profile, ok := pi.profilesPerFP[fp]
   394  		if !ok {
   395  			// If a profile labels is missing here, it has already been flushed
   396  			// and is supposed to be picked up from storage by querier
   397  			continue
   398  		}
   399  		for _, filter := range filters {
   400  			if !filter.Matches(profile.lbs.Get(filter.Name)) {
   401  				continue outer
   402  			}
   403  		}
   404  		if err := fn(profile.lbs, fp); err != nil {
   405  			return err
   406  		}
   407  	}
   408  	return nil
   409  }
   410  
   411  // WriteTo writes the profiles tsdb index to the specified filepath.
   412  func (pi *profilesIndex) writeTo(ctx context.Context, path string) ([][]rowRangeWithSeriesIndex, error) {
   413  	writer, err := index.NewWriter(ctx, path)
   414  	if err != nil {
   415  		return nil, err
   416  	}
   417  	pi.mutex.RLock()
   418  	defer pi.mutex.RUnlock()
   419  
   420  	pfs := make([]*profileSeries, 0, len(pi.profilesPerFP))
   421  
   422  	for _, p := range pi.profilesPerFP {
   423  		pfs = append(pfs, p)
   424  	}
   425  
   426  	// sort by fp
   427  	sort.Slice(pfs, func(i, j int) bool {
   428  		return phlaremodel.CompareLabelPairs(pfs[i].lbs, pfs[j].lbs) < 0
   429  	})
   430  
   431  	symbolsMap := make(map[string]struct{})
   432  	for _, s := range pfs {
   433  		for _, l := range s.lbs {
   434  			symbolsMap[l.Name] = struct{}{}
   435  			symbolsMap[l.Value] = struct{}{}
   436  		}
   437  	}
   438  
   439  	// Sort symbols
   440  	symbols := make([]string, 0, len(symbolsMap))
   441  	for s := range symbolsMap {
   442  		symbols = append(symbols, s)
   443  	}
   444  	sort.Strings(symbols)
   445  
   446  	// Add symbols
   447  	for _, symbol := range symbols {
   448  		if err := writer.AddSymbol(symbol); err != nil {
   449  			return nil, err
   450  		}
   451  	}
   452  
   453  	// ranges per row group
   454  	rangesPerRG := make([][]rowRangeWithSeriesIndex, len(pfs[0].profilesOnDisk))
   455  
   456  	// Add series
   457  	for i, s := range pfs {
   458  		if err := writer.AddSeries(storage.SeriesRef(i), s.lbs, s.fp, index.ChunkMeta{
   459  			MinTime: s.minTime,
   460  			MaxTime: s.maxTime,
   461  			// We store the series Index from the head with the series to use when retrieving data from parquet.
   462  			SeriesIndex: uint32(i),
   463  		}); err != nil {
   464  			return nil, err
   465  		}
   466  		// store series index
   467  		for idx, rg := range s.profilesOnDisk {
   468  			rangesPerRG[idx] = append(rangesPerRG[idx], rowRangeWithSeriesIndex{rowRange: rg, seriesIndex: uint32(i)})
   469  		}
   470  	}
   471  
   472  	return rangesPerRG, writer.Close()
   473  }
   474  
   475  func (pi *profilesIndex) cutRowGroup(rgProfiles []schemav1.InMemoryProfile) error {
   476  	pi.mutex.Lock()
   477  	defer pi.mutex.Unlock()
   478  
   479  	// adding rowGroup and rowNum information per fingerprint
   480  	rowRangePerFP := make(map[model.Fingerprint]*rowRange, len(pi.profilesPerFP))
   481  	countPerFP := make(map[model.Fingerprint]int, len(pi.profilesPerFP))
   482  	for rowNum, p := range rgProfiles {
   483  		countPerFP[p.SeriesFingerprint]++
   484  		if _, ok := rowRangePerFP[p.SeriesFingerprint]; !ok {
   485  			rowRangePerFP[p.SeriesFingerprint] = &rowRange{
   486  				rowNum: int64(rowNum),
   487  			}
   488  		}
   489  
   490  		rowRange := rowRangePerFP[p.SeriesFingerprint]
   491  		rowRange.length++
   492  
   493  		// sanity check
   494  		if (int(rowRange.rowNum) + rowRange.length - 1) != rowNum {
   495  			return fmt.Errorf("rowRange is not matching up, ensure that the ordering of the profile row group is ordered correctly, current row_num=%d, expect range %d-%d", rowNum, rowRange.rowNum, int(rowRange.rowNum)+rowRange.length)
   496  		}
   497  	}
   498  
   499  	pi.rowGroupsOnDisk += 1
   500  
   501  	for fp, ps := range pi.profilesPerFP {
   502  		count := countPerFP[fp]
   503  		// empty all in memory profiles
   504  		for i := range ps.profiles[:count] {
   505  			// Allow GC to evict the object.
   506  			ps.profiles[i] = nil
   507  		}
   508  		ps.profiles = ps.profiles[count:]
   509  
   510  		// attach rowGroup and rowNum information
   511  		rowRange := rowRangePerFP[ps.fp]
   512  
   513  		ps.profilesOnDisk = append(
   514  			ps.profilesOnDisk,
   515  			rowRange,
   516  		)
   517  
   518  	}
   519  
   520  	return nil
   521  }
   522  
   523  // SplitFiltersAndMatchers splits empty matchers off, which are treated as filters, see #220
   524  func SplitFiltersAndMatchers(allMatchers []*labels.Matcher) (filters, matchers []*labels.Matcher) {
   525  	for _, matcher := range allMatchers {
   526  		// If a matcher matches "", we need to fetch possible chunks where
   527  		// there is no value and will therefore not be in our label index.
   528  		// e.g. {foo=""} and {foo!="bar"} both match "", so we need to return
   529  		// chunks which do not have a foo label set. When looking entries in
   530  		// the index, we should ignore this matcher to fetch all possible chunks
   531  		// and then filter on the matcher after the chunks have been fetched.
   532  		if matcher.Matches("") {
   533  			filters = append(filters, matcher)
   534  		} else {
   535  			matchers = append(matchers, matcher)
   536  		}
   537  	}
   538  	return
   539  }