github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/dedup_slice.go (about)

     1  //nolint:unused,unparam
     2  package symdb
     3  
     4  import (
     5  	"fmt"
     6  	"hash/maphash"
     7  	stdslices "slices"
     8  	"sort"
     9  	"sync"
    10  	"unsafe"
    11  
    12  	"github.com/colega/zeropool"
    13  	"go.uber.org/atomic"
    14  
    15  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    16  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    17  	"github.com/grafana/pyroscope/pkg/pprof"
    18  	"github.com/grafana/pyroscope/pkg/slices"
    19  )
    20  
    21  // Refactored as is from the phlaredb package.
    22  
    23  var (
    24  	int64SlicePool  zeropool.Pool[[]int64]
    25  	uint32SlicePool zeropool.Pool[[]uint32]
    26  )
    27  
    28  // TODO(kolesnikovae):
    29  //   - PartitionWriter should only rewrite profile symbol indices;
    30  //   - InMemoryProfile should be created somewhere else on the call side.
    31  
    32  func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []schemav1.InMemoryProfile {
    33  	// create a rewriter state
    34  	rewrites := &rewriter{}
    35  
    36  	spans := pprof.ProfileSpans(profile)
    37  	pprof.ZeroLabelStrings(profile)
    38  
    39  	p.strings.ingest(profile.StringTable, rewrites)
    40  	mappings := make([]schemav1.InMemoryMapping, len(profile.Mapping))
    41  	for i, v := range profile.Mapping {
    42  		mappings[i] = schemav1.InMemoryMapping{
    43  			Id:              v.Id,
    44  			MemoryStart:     v.MemoryStart,
    45  			MemoryLimit:     v.MemoryLimit,
    46  			FileOffset:      v.FileOffset,
    47  			Filename:        uint32(v.Filename),
    48  			BuildId:         uint32(v.BuildId),
    49  			HasFunctions:    v.HasFunctions,
    50  			HasFilenames:    v.HasFilenames,
    51  			HasLineNumbers:  v.HasLineNumbers,
    52  			HasInlineFrames: v.HasInlineFrames,
    53  		}
    54  	}
    55  
    56  	p.mappings.ingest(mappings, rewrites)
    57  	funcs := make([]schemav1.InMemoryFunction, len(profile.Function))
    58  	for i, v := range profile.Function {
    59  		funcs[i] = schemav1.InMemoryFunction{
    60  			Id:         v.Id,
    61  			Name:       uint32(v.Name),
    62  			SystemName: uint32(v.SystemName),
    63  			Filename:   uint32(v.Filename),
    64  			StartLine:  uint32(v.StartLine),
    65  		}
    66  	}
    67  
    68  	p.functions.ingest(funcs, rewrites)
    69  	locs := make([]schemav1.InMemoryLocation, len(profile.Location))
    70  	for i, v := range profile.Location {
    71  		x := schemav1.InMemoryLocation{
    72  			Id:        v.Id,
    73  			Address:   v.Address,
    74  			MappingId: uint32(v.MappingId),
    75  			IsFolded:  v.IsFolded,
    76  		}
    77  		x.Line = make([]schemav1.InMemoryLine, len(v.Line))
    78  		for j, line := range v.Line {
    79  			x.Line[j] = schemav1.InMemoryLine{
    80  				FunctionId: uint32(line.FunctionId),
    81  				Line:       int32(line.Line),
    82  			}
    83  		}
    84  		locs[i] = x
    85  	}
    86  
    87  	p.locations.ingest(locs, rewrites)
    88  	samplesPerType := p.convertSamples(rewrites, profile.Sample, spans)
    89  
    90  	profiles := make([]schemav1.InMemoryProfile, len(samplesPerType))
    91  	for idxType := range samplesPerType {
    92  		profiles[idxType] = schemav1.InMemoryProfile{
    93  			StacktracePartition: p.header.Partition,
    94  			Samples:             samplesPerType[idxType],
    95  			DropFrames:          profile.DropFrames,
    96  			KeepFrames:          profile.KeepFrames,
    97  			TimeNanos:           profile.TimeNanos,
    98  			DurationNanos:       profile.DurationNanos,
    99  			Comments:            copySlice(profile.Comment),
   100  			DefaultSampleType:   profile.DefaultSampleType,
   101  		}
   102  	}
   103  
   104  	return profiles
   105  }
   106  
   107  func (p *PartitionWriter) convertSamples(r *rewriter, in []*profilev1.Sample, spans []uint64) []schemav1.Samples {
   108  	if len(in) == 0 {
   109  		return nil
   110  	}
   111  
   112  	// populate output
   113  	var (
   114  		samplesByType = make([]schemav1.Samples, len(in[0].Value))
   115  		stacktraces   = make([]*schemav1.Stacktrace, len(in))
   116  	)
   117  
   118  	for i := range samplesByType {
   119  		s := schemav1.Samples{
   120  			Values:        make([]uint64, len(in)),
   121  			StacktraceIDs: make([]uint32, len(in)),
   122  		}
   123  		if len(spans) > 0 {
   124  			s.Spans = make([]uint64, len(spans))
   125  			copy(s.Spans, spans)
   126  		}
   127  		samplesByType[i] = s
   128  	}
   129  
   130  	for idxSample := range in {
   131  		// populate samples
   132  		src := in[idxSample]
   133  		for idxType := range samplesByType {
   134  			samplesByType[idxType].Values[idxSample] = uint64(src.Value[idxType])
   135  		}
   136  		stacktraces[idxSample] = &schemav1.Stacktrace{LocationIDs: src.LocationId}
   137  		for i := range stacktraces[idxSample].LocationIDs {
   138  			r.locations.rewriteUint64(&stacktraces[idxSample].LocationIDs[i])
   139  		}
   140  	}
   141  
   142  	stacktracesIds := slices.GrowLen(uint32SlicePool.Get(), len(stacktraces))
   143  	p.stacktraces.append(stacktracesIds, stacktraces)
   144  
   145  	// Rewrite stacktraces
   146  	for idxType := range samplesByType {
   147  		samples := samplesByType[idxType]
   148  		for i := range samples.StacktraceIDs {
   149  			samples.StacktraceIDs[i] = stacktracesIds[i]
   150  		}
   151  		samples = samples.Compact(false)
   152  		sort.Sort(samples)
   153  		samplesByType[idxType] = samples
   154  	}
   155  
   156  	uint32SlicePool.Put(stacktracesIds)
   157  	return samplesByType
   158  }
   159  
   160  func copySlice[T any](in []T) []T {
   161  	out := make([]T, len(in))
   162  	copy(out, in)
   163  	return out
   164  }
   165  
   166  type idConversionTable map[int64]int64
   167  
   168  // nolint unused
   169  func (t idConversionTable) rewrite(idx *int64) {
   170  	pos := *idx
   171  	var ok bool
   172  	*idx, ok = t[pos]
   173  	if !ok {
   174  		panic(fmt.Sprintf("unable to rewrite index %d", pos))
   175  	}
   176  }
   177  
   178  // nolint unused
   179  func (t idConversionTable) rewriteUint64(idx *uint64) {
   180  	pos := *idx
   181  	v, ok := t[int64(pos)]
   182  	if !ok {
   183  		panic(fmt.Sprintf("unable to rewrite index %d", pos))
   184  	}
   185  	*idx = uint64(v)
   186  }
   187  
   188  // nolint unused
   189  func (t idConversionTable) rewriteUint32(idx *uint32) {
   190  	pos := *idx
   191  	v, ok := t[int64(pos)]
   192  	if !ok {
   193  		panic(fmt.Sprintf("unable to rewrite index %d", pos))
   194  	}
   195  	*idx = uint32(v)
   196  }
   197  
   198  func emptyRewriter() *rewriter {
   199  	return &rewriter{
   200  		strings: []int64{0},
   201  	}
   202  }
   203  
   204  // rewriter contains slices to rewrite the per profile reference into per head references.
   205  type rewriter struct {
   206  	strings stringConversionTable
   207  	// nolint unused
   208  	functions idConversionTable
   209  	// nolint unused
   210  	mappings idConversionTable
   211  	// nolint unused
   212  	locations idConversionTable
   213  }
   214  
   215  type storeHelper[M schemav1.Models] interface {
   216  	// some Models contain their own IDs within the struct, this allows to set them and keep track of the preexisting ID. It should return the oldID that is supposed to be rewritten.
   217  	setID(existingSliceID uint64, newID uint64, element *M) uint64
   218  
   219  	// size returns a (rough estimation) of the size of a single element M
   220  	size(M) uint64
   221  
   222  	// clone copies parts that are not optimally sized from protobuf parsing
   223  	clone(M) M
   224  
   225  	rewrite(*rewriter, *M) error
   226  }
   227  
   228  type Helper[M schemav1.Models, K comparable] interface {
   229  	storeHelper[M]
   230  	key(M) K
   231  	addToRewriter(*rewriter, idConversionTable)
   232  }
   233  
   234  type deduplicatingSlice[M schemav1.Models, K comparable, H Helper[M, K]] struct {
   235  	lock   sync.RWMutex
   236  	slice  []M
   237  	size   atomic.Uint64
   238  	lookup map[K]int64
   239  
   240  	helper H
   241  }
   242  
   243  func (s *deduplicatingSlice[M, K, H]) init() {
   244  	s.lookup = make(map[K]int64)
   245  }
   246  
   247  func (s *deduplicatingSlice[M, K, H]) MemorySize() uint64 {
   248  	// FIXME(kolesnikovae): Map footprint + slice capacity.
   249  	return s.size.Load()
   250  }
   251  
   252  func (s *deduplicatingSlice[M, K, H]) Size() uint64 {
   253  	return s.size.Load()
   254  }
   255  
   256  func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) {
   257  	var (
   258  		rewritingMap = make(map[int64]int64, len(elems))
   259  		missing      = int64SlicePool.Get()
   260  	)
   261  	missing = missing[:0]
   262  	// rewrite elements
   263  	for pos := range elems {
   264  		_ = s.helper.rewrite(rewriter, &elems[pos])
   265  	}
   266  
   267  	// try to find if element already exists in slice, when supposed to deduplicate
   268  	s.lock.RLock()
   269  	for pos := range elems {
   270  		k := s.helper.key(elems[pos])
   271  		if posSlice, exists := s.lookup[k]; exists {
   272  			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
   273  		} else {
   274  			missing = append(missing, int64(pos))
   275  		}
   276  	}
   277  	s.lock.RUnlock()
   278  
   279  	// if there are missing elements, acquire write lock
   280  	if len(missing) > 0 {
   281  		s.lock.Lock()
   282  		posSlice := int64(len(s.slice))
   283  		s.slice = stdslices.Grow(s.slice, len(missing))
   284  		for _, pos := range missing {
   285  			// check again if element exists
   286  			k := s.helper.key(elems[pos])
   287  			if posSlice, exists := s.lookup[k]; exists {
   288  				rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
   289  				continue
   290  			}
   291  
   292  			// add element to slice/map
   293  			s.slice = append(s.slice, s.helper.clone(elems[pos]))
   294  			s.lookup[k] = posSlice
   295  			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
   296  			posSlice++
   297  			s.size.Add(s.helper.size(elems[pos]))
   298  		}
   299  		s.lock.Unlock()
   300  	}
   301  
   302  	// nolint staticcheck
   303  	int64SlicePool.Put(missing)
   304  
   305  	// add rewrite information to struct
   306  	s.helper.addToRewriter(rewriter, rewritingMap)
   307  }
   308  
   309  func (s *deduplicatingSlice[M, K, H]) append(dst []uint32, elems []M) {
   310  	missing := int64SlicePool.Get()[:0]
   311  	s.lock.RLock()
   312  	for i, v := range elems {
   313  		k := s.helper.key(v)
   314  		if x, ok := s.lookup[k]; ok {
   315  			dst[i] = uint32(x)
   316  		} else {
   317  			missing = append(missing, int64(i))
   318  		}
   319  	}
   320  	s.lock.RUnlock()
   321  	if len(missing) > 0 {
   322  		s.lock.RLock()
   323  		p := uint32(len(s.slice))
   324  		for _, i := range missing {
   325  			e := elems[i]
   326  			k := s.helper.key(e)
   327  			x, ok := s.lookup[k]
   328  			if ok {
   329  				dst[i] = uint32(x)
   330  				continue
   331  			}
   332  			s.size.Add(s.helper.size(e))
   333  			s.slice = append(s.slice, s.helper.clone(e))
   334  			s.lookup[k] = int64(p)
   335  			dst[i] = p
   336  			p++
   337  		}
   338  		s.lock.RUnlock()
   339  	}
   340  	int64SlicePool.Put(missing)
   341  }
   342  
   343  func (s *deduplicatingSlice[M, K, H]) sliceHeaderCopy() []M {
   344  	s.lock.RLock()
   345  	h := s.slice
   346  	s.lock.RUnlock()
   347  	return h
   348  }
   349  
   350  type stringConversionTable []int64
   351  
   352  func (t stringConversionTable) rewrite(idx *int64) {
   353  	originalValue := int(*idx)
   354  	newValue := t[originalValue]
   355  	*idx = newValue
   356  }
   357  
   358  func (t stringConversionTable) rewriteUint32(idx *uint32) {
   359  	originalValue := int(*idx)
   360  	newValue := t[originalValue]
   361  	*idx = uint32(newValue)
   362  }
   363  
   364  type stringsHelper struct{}
   365  
   366  func (*stringsHelper) key(s string) string {
   367  	return s
   368  }
   369  
   370  func (*stringsHelper) addToRewriter(r *rewriter, m idConversionTable) {
   371  	var maxID int64
   372  	for id := range m {
   373  		if id > maxID {
   374  			maxID = id
   375  		}
   376  	}
   377  	r.strings = make(stringConversionTable, maxID+1)
   378  
   379  	for x, y := range m {
   380  		r.strings[x] = y
   381  	}
   382  }
   383  
   384  // nolint unused
   385  func (*stringsHelper) rewrite(*rewriter, *string) error {
   386  	return nil
   387  }
   388  
   389  func (*stringsHelper) size(s string) uint64 {
   390  	return uint64(len(s))
   391  }
   392  
   393  func (*stringsHelper) setID(oldID, newID uint64, s *string) uint64 {
   394  	return oldID
   395  }
   396  
   397  func (*stringsHelper) clone(s string) string {
   398  	return s
   399  }
   400  
   401  type locationsKey struct {
   402  	MappingId uint32 //nolint
   403  	Address   uint64
   404  	LinesHash uint64
   405  }
   406  
   407  const (
   408  	lineSize     = uint64(unsafe.Sizeof(schemav1.InMemoryLine{}))
   409  	locationSize = uint64(unsafe.Sizeof(schemav1.InMemoryLocation{}))
   410  )
   411  
   412  type locationsHelper struct{}
   413  
   414  func (*locationsHelper) key(l schemav1.InMemoryLocation) locationsKey {
   415  	return locationsKey{
   416  		Address:   l.Address,
   417  		MappingId: l.MappingId,
   418  		LinesHash: hashLines(l.Line),
   419  	}
   420  }
   421  
   422  var mapHashSeed = maphash.MakeSeed()
   423  
   424  func hashLines(s []schemav1.InMemoryLine) uint64 {
   425  	if len(s) == 0 {
   426  		return 0
   427  	}
   428  	p := (*byte)(unsafe.Pointer(&s[0]))
   429  	b := unsafe.Slice(p, len(s)*int(lineSize))
   430  	return maphash.Bytes(mapHashSeed, b)
   431  }
   432  
   433  func hashLocations(s []uint64) uint64 {
   434  	if len(s) == 0 {
   435  		return 0
   436  	}
   437  	p := (*byte)(unsafe.Pointer(&s[0]))
   438  	b := unsafe.Slice(p, len(s)*8)
   439  	return maphash.Bytes(mapHashSeed, b)
   440  }
   441  
   442  func (*locationsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) {
   443  	r.locations = elemRewriter
   444  }
   445  
   446  func (*locationsHelper) rewrite(r *rewriter, l *schemav1.InMemoryLocation) error {
   447  	// when mapping id is not 0, rewrite it
   448  	if l.MappingId != 0 {
   449  		r.mappings.rewriteUint32(&l.MappingId)
   450  	}
   451  	for pos := range l.Line {
   452  		r.functions.rewriteUint32(&l.Line[pos].FunctionId)
   453  	}
   454  	return nil
   455  }
   456  
   457  func (*locationsHelper) setID(_, newID uint64, l *schemav1.InMemoryLocation) uint64 {
   458  	oldID := l.Id
   459  	l.Id = newID
   460  	return oldID
   461  }
   462  
   463  func (*locationsHelper) size(l schemav1.InMemoryLocation) uint64 {
   464  	return uint64(len(l.Line))*lineSize + locationSize
   465  }
   466  
   467  func (*locationsHelper) clone(l schemav1.InMemoryLocation) schemav1.InMemoryLocation {
   468  	x := l
   469  	x.Line = make([]schemav1.InMemoryLine, len(l.Line))
   470  	copy(x.Line, l.Line)
   471  	return x
   472  }
   473  
   474  type mappingsHelper struct{}
   475  
   476  const mappingSize = uint64(unsafe.Sizeof(schemav1.InMemoryMapping{}))
   477  
   478  type mappingsKey struct {
   479  	MemoryStart     uint64
   480  	MemoryLimit     uint64
   481  	FileOffset      uint64
   482  	Filename        uint32 // Index into string table
   483  	BuildId         uint32 // Index into string table
   484  	HasFunctions    bool
   485  	HasFilenames    bool
   486  	HasLineNumbers  bool
   487  	HasInlineFrames bool
   488  }
   489  
   490  func (*mappingsHelper) key(m schemav1.InMemoryMapping) mappingsKey {
   491  	return mappingsKey{
   492  		MemoryStart:     m.MemoryStart,
   493  		MemoryLimit:     m.MemoryLimit,
   494  		FileOffset:      m.FileOffset,
   495  		Filename:        m.Filename,
   496  		BuildId:         m.BuildId,
   497  		HasFunctions:    m.HasFunctions,
   498  		HasFilenames:    m.HasFilenames,
   499  		HasLineNumbers:  m.HasLineNumbers,
   500  		HasInlineFrames: m.HasInlineFrames,
   501  	}
   502  }
   503  
   504  func (*mappingsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) {
   505  	r.mappings = elemRewriter
   506  }
   507  
   508  // nolint unparam
   509  func (*mappingsHelper) rewrite(r *rewriter, m *schemav1.InMemoryMapping) error {
   510  	r.strings.rewriteUint32(&m.Filename)
   511  	r.strings.rewriteUint32(&m.BuildId)
   512  	return nil
   513  }
   514  
   515  func (*mappingsHelper) setID(_, newID uint64, m *schemav1.InMemoryMapping) uint64 {
   516  	oldID := m.Id
   517  	m.Id = newID
   518  	return oldID
   519  }
   520  
   521  func (*mappingsHelper) size(_ schemav1.InMemoryMapping) uint64 {
   522  	return mappingSize
   523  }
   524  
   525  func (*mappingsHelper) clone(m schemav1.InMemoryMapping) schemav1.InMemoryMapping {
   526  	return m
   527  }
   528  
   529  type functionsKey struct {
   530  	Name       uint32
   531  	SystemName uint32
   532  	Filename   uint32
   533  	StartLine  uint32
   534  }
   535  
   536  type functionsHelper struct{}
   537  
   538  const functionSize = uint64(unsafe.Sizeof(schemav1.InMemoryFunction{}))
   539  
   540  func (*functionsHelper) key(f schemav1.InMemoryFunction) functionsKey {
   541  	return functionsKey{
   542  		Name:       f.Name,
   543  		SystemName: f.SystemName,
   544  		Filename:   f.Filename,
   545  		StartLine:  f.StartLine,
   546  	}
   547  }
   548  
   549  func (*functionsHelper) addToRewriter(r *rewriter, elemRewriter idConversionTable) {
   550  	r.functions = elemRewriter
   551  }
   552  
   553  func (*functionsHelper) rewrite(r *rewriter, f *schemav1.InMemoryFunction) error {
   554  	r.strings.rewriteUint32(&f.Filename)
   555  	r.strings.rewriteUint32(&f.Name)
   556  	r.strings.rewriteUint32(&f.SystemName)
   557  	return nil
   558  }
   559  
   560  func (*functionsHelper) setID(_, newID uint64, f *schemav1.InMemoryFunction) uint64 {
   561  	oldID := f.Id
   562  	f.Id = newID
   563  	return oldID
   564  }
   565  
   566  func (*functionsHelper) size(_ schemav1.InMemoryFunction) uint64 {
   567  	return functionSize
   568  }
   569  
   570  func (*functionsHelper) clone(f schemav1.InMemoryFunction) schemav1.InMemoryFunction {
   571  	return f
   572  }