github.com/grafana/pyroscope@v1.18.0/pkg/segmentwriter/memdb/profile_index.go (about)

     1  package memdb
     2  
     3  import (
     4  	"context"
     5  	"sort"
     6  	"sync"
     7  
     8  	"github.com/prometheus/common/model"
     9  	"github.com/prometheus/prometheus/storage"
    10  	"go.uber.org/atomic"
    11  
    12  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    13  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    14  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb"
    15  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    16  	memindex "github.com/grafana/pyroscope/pkg/segmentwriter/memdb/index"
    17  )
    18  
    19  type profileSeries struct {
    20  	lbs              phlaremodel.Labels
    21  	fp               model.Fingerprint
    22  	minTime, maxTime int64
    23  	profiles         []*schemav1.InMemoryProfile
    24  }
    25  
    26  type profilesIndex struct {
    27  	ix            *tsdb.BitPrefixInvertedIndex
    28  	profilesPerFP map[model.Fingerprint]*profileSeries
    29  	mutex         sync.RWMutex
    30  	metrics       *HeadMetrics
    31  	totalSeries   *atomic.Int64
    32  }
    33  
    34  func newProfileIndex(metrics *HeadMetrics) *profilesIndex {
    35  	ix, err := tsdb.NewBitPrefixWithShards(32)
    36  	if err != nil {
    37  		panic(err)
    38  	}
    39  	return &profilesIndex{
    40  		ix:            ix,
    41  		profilesPerFP: make(map[model.Fingerprint]*profileSeries),
    42  		metrics:       metrics,
    43  		totalSeries:   atomic.NewInt64(0),
    44  	}
    45  }
    46  
    47  // Add a new set of profile to the index.
    48  // The seriesRef are expected to match the profile labels passed in.
    49  func (pi *profilesIndex) Add(ps *schemav1.InMemoryProfile, lbs phlaremodel.Labels, profileName string) {
    50  	pi.mutex.Lock()
    51  	defer pi.mutex.Unlock()
    52  	profiles, ok := pi.profilesPerFP[ps.SeriesFingerprint]
    53  	if !ok {
    54  		lbs := pi.ix.Add(lbs, ps.SeriesFingerprint)
    55  		profiles = &profileSeries{
    56  			lbs:     lbs,
    57  			fp:      ps.SeriesFingerprint,
    58  			minTime: ps.TimeNanos,
    59  			maxTime: ps.TimeNanos,
    60  		}
    61  		pi.profilesPerFP[ps.SeriesFingerprint] = profiles
    62  		//pi.metrics.series.Set(float64(pi.totalSeries.Inc())) // todo how did it work?
    63  		pi.totalSeries.Inc()
    64  		pi.metrics.seriesCreated.WithLabelValues(profileName).Inc()
    65  	}
    66  
    67  	// profile is latest in this series, use a shortcut
    68  	if ps.TimeNanos > profiles.maxTime {
    69  		// update max timeNanos
    70  		profiles.maxTime = ps.TimeNanos
    71  
    72  		// add profile to in memory slice
    73  		profiles.profiles = append(profiles.profiles, ps)
    74  	} else {
    75  		// use binary search to find position
    76  		i := sort.Search(len(profiles.profiles), func(i int) bool {
    77  			return profiles.profiles[i].TimeNanos > ps.TimeNanos
    78  		})
    79  
    80  		// insert into slice at correct position
    81  		profiles.profiles = append(profiles.profiles, &schemav1.InMemoryProfile{})
    82  		copy(profiles.profiles[i+1:], profiles.profiles[i:])
    83  		profiles.profiles[i] = ps
    84  	}
    85  
    86  	if ps.TimeNanos < profiles.minTime {
    87  		profiles.minTime = ps.TimeNanos
    88  	}
    89  
    90  	//pi.metrics.profiles.Set(float64(pi.totalProfiles.Inc())) //todo how did it work?
    91  	pi.metrics.profilesCreated.WithLabelValues(profileName).Inc()
    92  }
    93  
    94  func (pi *profilesIndex) Flush(ctx context.Context) ([]byte, []schemav1.InMemoryProfile, error) {
    95  	writer, err := memindex.NewWriter(ctx, memindex.SegmentsIndexWriterBufSize)
    96  	if err != nil {
    97  		return nil, nil, err
    98  	}
    99  	pi.mutex.RLock()
   100  	defer pi.mutex.RUnlock()
   101  
   102  	// TODO(kolesnikovae): We should reuse these series
   103  	//   when building dataset index.
   104  	pfs := make([]*profileSeries, 0, len(pi.profilesPerFP))
   105  	profilesSize := 0
   106  
   107  	for _, p := range pi.profilesPerFP {
   108  		pfs = append(pfs, p)
   109  		profilesSize += len(p.profiles)
   110  	}
   111  
   112  	// sort by fp
   113  	sort.Slice(pfs, func(i, j int) bool {
   114  		return phlaremodel.CompareLabelPairs(pfs[i].lbs, pfs[j].lbs) < 0
   115  	})
   116  
   117  	symbolsMap := make(map[string]struct{})
   118  	for _, s := range pfs {
   119  		for _, l := range s.lbs {
   120  			symbolsMap[l.Name] = struct{}{}
   121  			symbolsMap[l.Value] = struct{}{}
   122  		}
   123  	}
   124  
   125  	// Sort symbols
   126  	symbols := make([]string, 0, len(symbolsMap))
   127  	for s := range symbolsMap {
   128  		symbols = append(symbols, s)
   129  	}
   130  	sort.Strings(symbols)
   131  
   132  	// Add symbols
   133  	for _, symbol := range symbols {
   134  		if err := writer.AddSymbol(symbol); err != nil {
   135  			return nil, nil, err
   136  		}
   137  	}
   138  
   139  	profiles := make([]schemav1.InMemoryProfile, 0, profilesSize)
   140  
   141  	// Add series
   142  	for i, s := range pfs {
   143  		if err := writer.AddSeries(storage.SeriesRef(i), s.lbs, s.fp, index.ChunkMeta{
   144  			MinTime: s.minTime,
   145  			MaxTime: s.maxTime,
   146  			// We store the series Index from the head with the series to use when retrieving data from parquet.
   147  			SeriesIndex: uint32(i),
   148  		}); err != nil {
   149  			return nil, nil, err
   150  		}
   151  		// store series index
   152  		for j := range s.profiles {
   153  			s.profiles[j].SeriesIndex = uint32(i)
   154  		}
   155  		//profiles = append(profiles, s.profiles...)
   156  		for _, profile := range s.profiles {
   157  			profiles = append(profiles, *profile) //todo avoid copy
   158  		}
   159  	}
   160  
   161  	err = writer.Close()
   162  	if err != nil {
   163  		return nil, nil, err
   164  	}
   165  
   166  	//todo maybe return the bufferWriter to avoid copy, it is copied again anyway
   167  	tsdbIndex := writer.ReleaseIndex()
   168  
   169  	return tsdbIndex, profiles, err
   170  }
   171  
   172  func (pi *profilesIndex) profileTypeNames() ([]string, error) {
   173  	pi.mutex.RLock()
   174  	defer pi.mutex.RUnlock()
   175  	ptypes, err := pi.ix.LabelValues(phlaremodel.LabelNameProfileType, nil)
   176  	sort.Strings(ptypes)
   177  	return ptypes, err
   178  }