github.com/grafana/pyroscope@v1.18.0/pkg/pprof/merge.go (about)

     1  package pprof
     2  
     3  import (
     4  	"fmt"
     5  	"hash/maphash"
     6  	"sort"
     7  	"sync"
     8  
     9  	"github.com/dolthub/swiss"
    10  
    11  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    12  	"github.com/grafana/pyroscope/pkg/slices"
    13  )
    14  
    15  // TODO(kolesnikovae):
    16  //   Add a function that incorporates Merge and Normalize.
    17  //   Both functions perform some sanity checks but none of them
    18  //   is enough to "vet" the profile completely.
    19  //   Specifically:
    20  //    - it's possible that unreferenced objects will remain in the
    21  //      profile, and therefore will be written to the storage.
    22  //    - Normalize does not remove duplicates and unreferenced objects
    23  //      except samples.
    24  //    - Merge does not remove unreferenced objects at all.
    25  //    - Merge is fairly expensive: allocated capacities should be
    26  //      reused and the number of allocs decreased.
    27  
    28  type ProfileMerge struct {
    29  	mu sync.Mutex
    30  
    31  	profile *profilev1.Profile
    32  	tmp     []uint32
    33  
    34  	stringTable   RewriteTable[string, string, string]
    35  	functionTable RewriteTable[FunctionKey, *profilev1.Function, *profilev1.Function]
    36  	mappingTable  RewriteTable[MappingKey, *profilev1.Mapping, *profilev1.Mapping]
    37  	locationTable RewriteTable[LocationKey, *profilev1.Location, *profilev1.Location]
    38  	sampleTable   RewriteTable[SampleKey, *profilev1.Sample, *profilev1.Sample]
    39  }
    40  
    41  // Merge adds p to the profile merge, cloning new objects.
    42  // Profile p is modified in place but not retained by the function.
    43  func (m *ProfileMerge) Merge(p *profilev1.Profile, sanitize bool) error {
    44  	m.mu.Lock()
    45  	defer m.mu.Unlock()
    46  
    47  	if p == nil || len(p.Sample) == 0 || len(p.StringTable) < 2 {
    48  		return nil
    49  	}
    50  
    51  	if sanitize {
    52  		var stats sanitizeStats
    53  		sanitizeProfile(p, &stats)
    54  	}
    55  	var initial bool
    56  	if m.profile == nil {
    57  		m.init(p)
    58  		initial = true
    59  	}
    60  
    61  	// We rewrite strings first in order to compare
    62  	// sample types and period type.
    63  	m.tmp = slices.GrowLen(m.tmp, len(p.StringTable))
    64  	m.stringTable.Index(m.tmp, p.StringTable)
    65  	RewriteStrings(p, m.tmp)
    66  	if initial {
    67  		// Right after initialisation we need to make
    68  		// sure that the string identifiers are normalized
    69  		// among profiles.
    70  		RewriteStrings(m.profile, m.tmp)
    71  	}
    72  
    73  	if err := combineHeaders(m.profile, p); err != nil {
    74  		return err
    75  	}
    76  
    77  	m.tmp = slices.GrowLen(m.tmp, len(p.Function))
    78  	m.functionTable.Index(m.tmp, p.Function)
    79  	RewriteFunctions(p, m.tmp)
    80  
    81  	m.tmp = slices.GrowLen(m.tmp, len(p.Mapping))
    82  	m.mappingTable.Index(m.tmp, p.Mapping)
    83  	RewriteMappings(p, m.tmp)
    84  
    85  	m.tmp = slices.GrowLen(m.tmp, len(p.Location))
    86  	m.locationTable.Index(m.tmp, p.Location)
    87  	RewriteLocations(p, m.tmp)
    88  
    89  	m.tmp = slices.GrowLen(m.tmp, len(p.Sample))
    90  	m.sampleTable.Index(m.tmp, p.Sample)
    91  
    92  	for i, idx := range m.tmp {
    93  		dst := m.sampleTable.s[idx].Value
    94  		src := p.Sample[i].Value
    95  		for j, v := range src {
    96  			dst[j] += v
    97  		}
    98  	}
    99  
   100  	return nil
   101  }
   102  
   103  func (m *ProfileMerge) MergeBytes(b []byte, sanitize bool) error {
   104  	var p profilev1.Profile
   105  	if err := Unmarshal(b, &p); err != nil {
   106  		return err
   107  	}
   108  	return m.Merge(&p, sanitize)
   109  }
   110  
   111  func (m *ProfileMerge) Profile() *profilev1.Profile {
   112  	if m.profile == nil {
   113  		return &profilev1.Profile{
   114  			SampleType:  []*profilev1.ValueType{new(profilev1.ValueType)},
   115  			PeriodType:  new(profilev1.ValueType),
   116  			StringTable: []string{""},
   117  		}
   118  	}
   119  	m.profile.Sample = m.sampleTable.Values()
   120  	m.profile.Location = m.locationTable.Values()
   121  	m.profile.Function = m.functionTable.Values()
   122  	m.profile.Mapping = m.mappingTable.Values()
   123  	m.profile.StringTable = m.stringTable.Values()
   124  	for i := range m.profile.Location {
   125  		m.profile.Location[i].Id = uint64(i + 1)
   126  	}
   127  	for i := range m.profile.Function {
   128  		m.profile.Function[i].Id = uint64(i + 1)
   129  	}
   130  	for i := range m.profile.Mapping {
   131  		m.profile.Mapping[i].Id = uint64(i + 1)
   132  	}
   133  	return m.profile
   134  }
   135  
   136  func (m *ProfileMerge) init(x *profilev1.Profile) {
   137  	factor := 2
   138  	m.stringTable = NewRewriteTable(
   139  		factor*len(x.StringTable),
   140  		func(s string) string { return s },
   141  		func(s string) string { return s },
   142  	)
   143  
   144  	m.functionTable = NewRewriteTable[FunctionKey, *profilev1.Function, *profilev1.Function](
   145  		factor*len(x.Function), GetFunctionKey, cloneVT[*profilev1.Function])
   146  
   147  	m.mappingTable = NewRewriteTable[MappingKey, *profilev1.Mapping, *profilev1.Mapping](
   148  		factor*len(x.Mapping), GetMappingKey, cloneVT[*profilev1.Mapping])
   149  
   150  	m.locationTable = NewRewriteTable[LocationKey, *profilev1.Location, *profilev1.Location](
   151  		factor*len(x.Location), GetLocationKey, cloneVT[*profilev1.Location])
   152  
   153  	m.sampleTable = NewRewriteTable[SampleKey, *profilev1.Sample, *profilev1.Sample](
   154  		factor*len(x.Sample), GetSampleKey, func(sample *profilev1.Sample) *profilev1.Sample {
   155  			c := sample.CloneVT()
   156  			slices.Clear(c.Value)
   157  			return c
   158  		})
   159  
   160  	m.profile = &profilev1.Profile{
   161  		SampleType: make([]*profilev1.ValueType, len(x.SampleType)),
   162  		DropFrames: x.DropFrames,
   163  		KeepFrames: x.KeepFrames,
   164  		TimeNanos:  x.TimeNanos,
   165  		// Profile durations are summed up, therefore
   166  		// we skip the field at initialization.
   167  		// DurationNanos:  x.DurationNanos,
   168  		PeriodType:        x.PeriodType.CloneVT(),
   169  		Period:            x.Period,
   170  		DefaultSampleType: x.DefaultSampleType,
   171  	}
   172  	for i, st := range x.SampleType {
   173  		m.profile.SampleType[i] = st.CloneVT()
   174  	}
   175  }
   176  
   177  func cloneVT[T interface{ CloneVT() T }](t T) T { return t.CloneVT() }
   178  
   179  // combineHeaders checks that all profiles can be merged and returns
   180  // their combined profile.
   181  // NOTE(kolesnikovae): Copied from pprof.
   182  func combineHeaders(a, b *profilev1.Profile) error {
   183  	if err := compatible(a, b); err != nil {
   184  		return err
   185  	}
   186  	// Smallest timestamp.
   187  	if a.TimeNanos == 0 || b.TimeNanos < a.TimeNanos {
   188  		a.TimeNanos = b.TimeNanos
   189  	}
   190  	// Summed up duration.
   191  	a.DurationNanos += b.DurationNanos
   192  	// Largest period.
   193  	if a.Period == 0 || a.Period < b.Period {
   194  		a.Period = b.Period
   195  	}
   196  	if a.DefaultSampleType == 0 {
   197  		a.DefaultSampleType = b.DefaultSampleType
   198  	}
   199  	return nil
   200  }
   201  
   202  // compatible determines if two profiles can be compared/merged.
   203  // returns nil if the profiles are compatible; otherwise an error with
   204  // details on the incompatibility.
   205  func compatible(a, b *profilev1.Profile) error {
   206  	if !equalValueType(a.PeriodType, b.PeriodType) {
   207  		return fmt.Errorf("incompatible period types %v and %v", a.PeriodType, b.PeriodType)
   208  	}
   209  	if len(b.SampleType) != len(a.SampleType) {
   210  		return fmt.Errorf("incompatible sample types %v and %v", a.SampleType, b.SampleType)
   211  	}
   212  	for i := range a.SampleType {
   213  		if !equalValueType(a.SampleType[i], b.SampleType[i]) {
   214  			return fmt.Errorf("incompatible sample types %v and %v", a.SampleType, b.SampleType)
   215  		}
   216  	}
   217  	return nil
   218  }
   219  
   220  // equalValueType returns true if the two value types are semantically
   221  // equal. It ignores the internal fields used during encode/decode.
   222  func equalValueType(st1, st2 *profilev1.ValueType) bool {
   223  	if st1 == nil || st2 == nil {
   224  		return false
   225  	}
   226  	return st1.Type == st2.Type && st1.Unit == st2.Unit
   227  }
   228  
   229  func RewriteStrings(p *profilev1.Profile, n []uint32) {
   230  	for _, t := range p.SampleType {
   231  		if t.Unit != 0 {
   232  			t.Unit = int64(n[t.Unit])
   233  		}
   234  		if t.Type != 0 {
   235  			t.Type = int64(n[t.Type])
   236  		}
   237  	}
   238  	for _, s := range p.Sample {
   239  		for _, l := range s.Label {
   240  			l.Key = int64(n[l.Key])
   241  			l.Str = int64(n[l.Str])
   242  		}
   243  	}
   244  	for _, m := range p.Mapping {
   245  		m.Filename = int64(n[m.Filename])
   246  		m.BuildId = int64(n[m.BuildId])
   247  	}
   248  	for _, f := range p.Function {
   249  		f.Name = int64(n[f.Name])
   250  		f.Filename = int64(n[f.Filename])
   251  		f.SystemName = int64(n[f.SystemName])
   252  	}
   253  	p.DropFrames = int64(n[p.DropFrames])
   254  	p.KeepFrames = int64(n[p.KeepFrames])
   255  	if p.PeriodType != nil {
   256  		if p.PeriodType.Type != 0 {
   257  			p.PeriodType.Type = int64(n[p.PeriodType.Type])
   258  		}
   259  		if p.PeriodType.Unit != 0 {
   260  			p.PeriodType.Unit = int64(n[p.PeriodType.Unit])
   261  		}
   262  	}
   263  	for i, x := range p.Comment {
   264  		p.Comment[i] = int64(n[x])
   265  	}
   266  	p.DefaultSampleType = int64(n[p.DefaultSampleType])
   267  }
   268  
   269  func RewriteFunctions(p *profilev1.Profile, n []uint32) {
   270  	for _, loc := range p.Location {
   271  		for _, line := range loc.Line {
   272  			if line.FunctionId > 0 {
   273  				line.FunctionId = uint64(n[line.FunctionId-1]) + 1
   274  			}
   275  		}
   276  	}
   277  }
   278  
   279  func RewriteMappings(p *profilev1.Profile, n []uint32) {
   280  	for _, loc := range p.Location {
   281  		if loc.MappingId > 0 {
   282  			loc.MappingId = uint64(n[loc.MappingId-1]) + 1
   283  		}
   284  	}
   285  }
   286  
   287  func RewriteLocations(p *profilev1.Profile, n []uint32) {
   288  	for _, s := range p.Sample {
   289  		for i, loc := range s.LocationId {
   290  			if loc > 0 {
   291  				s.LocationId[i] = uint64(n[loc-1]) + 1
   292  			}
   293  		}
   294  	}
   295  }
   296  
   297  type FunctionKey struct {
   298  	startLine  uint32
   299  	name       uint32
   300  	systemName uint32
   301  	fileName   uint32
   302  }
   303  
   304  func GetFunctionKey(fn *profilev1.Function) FunctionKey {
   305  	return FunctionKey{
   306  		startLine:  uint32(fn.StartLine),
   307  		name:       uint32(fn.Name),
   308  		systemName: uint32(fn.SystemName),
   309  		fileName:   uint32(fn.Filename),
   310  	}
   311  }
   312  
   313  type MappingKey struct {
   314  	size          uint64
   315  	offset        uint64
   316  	buildIDOrFile int64
   317  }
   318  
   319  func GetMappingKey(m *profilev1.Mapping) MappingKey {
   320  	// NOTE(kolesnikovae): Copied from pprof.
   321  	// Normalize addresses to handle address space randomization.
   322  	// Round up to next 4K boundary to avoid minor discrepancies.
   323  	const mapsizeRounding = 0x1000
   324  	size := m.MemoryLimit - m.MemoryStart
   325  	size = size + mapsizeRounding - 1
   326  	size = size - (size % mapsizeRounding)
   327  	k := MappingKey{
   328  		size:   size,
   329  		offset: m.FileOffset,
   330  	}
   331  	switch {
   332  	case m.BuildId != 0:
   333  		k.buildIDOrFile = m.BuildId
   334  	case m.Filename != 0:
   335  		k.buildIDOrFile = m.Filename
   336  	default:
   337  		// A mapping containing neither build ID nor file name is a fake mapping. A
   338  		// key with empty buildIDOrFile is used for fake mappings so that they are
   339  		// treated as the same mapping during merging.
   340  	}
   341  	return k
   342  }
   343  
   344  type LocationKey struct {
   345  	addr      uint64
   346  	lines     uint64
   347  	mappingID uint64
   348  }
   349  
   350  func GetLocationKey(loc *profilev1.Location) LocationKey {
   351  	return LocationKey{
   352  		addr:      loc.Address,
   353  		mappingID: loc.MappingId,
   354  		lines:     hashLines(loc.Line),
   355  	}
   356  }
   357  
   358  type SampleKey struct {
   359  	locations uint64
   360  	labels    uint64
   361  }
   362  
   363  func GetSampleKey(s *profilev1.Sample) SampleKey {
   364  	return SampleKey{
   365  		locations: hashLocations(s.LocationId),
   366  		labels:    hashLabels(s.Label),
   367  	}
   368  }
   369  
   370  var mapHashSeed = maphash.MakeSeed()
   371  
   372  // NOTE(kolesnikovae):
   373  //  Probably we should use strings instead of hashes
   374  //  to eliminate collisions.
   375  
   376  func hashLocations(s []uint64) uint64 {
   377  	return maphash.Bytes(mapHashSeed, uint64Bytes(s))
   378  }
   379  
   380  func hashLines(s []*profilev1.Line) uint64 {
   381  	x := make([]uint64, len(s))
   382  	for i, l := range s {
   383  		x[i] = l.FunctionId | uint64(l.Line)<<32
   384  	}
   385  	return maphash.Bytes(mapHashSeed, uint64Bytes(x))
   386  }
   387  
   388  func hashLabels(s []*profilev1.Label) uint64 {
   389  	if len(s) == 0 {
   390  		return 0
   391  	}
   392  	sort.Sort(LabelsByKeyValue(s))
   393  	x := make([]uint64, len(s))
   394  	for i, l := range s {
   395  		// Num and Unit ignored.
   396  		x[i] = uint64(l.Key | l.Str<<32)
   397  	}
   398  	return maphash.Bytes(mapHashSeed, uint64Bytes(x))
   399  }
   400  
   401  // RewriteTable maintains unique values V and their indices.
   402  // V is never modified nor retained, K and M are kept in memory.
   403  type RewriteTable[K comparable, V, M any] struct {
   404  	k func(V) K
   405  	v func(V) M
   406  	t *swiss.Map[K, uint32]
   407  	s []M
   408  }
   409  
   410  func NewRewriteTable[K comparable, V, M any](
   411  	size int,
   412  	k func(V) K,
   413  	v func(V) M,
   414  ) RewriteTable[K, V, M] {
   415  	return RewriteTable[K, V, M]{
   416  		k: k,
   417  		v: v,
   418  		t: swiss.NewMap[K, uint32](uint32(size)),
   419  		s: make([]M, 0, size),
   420  	}
   421  }
   422  
   423  func (t *RewriteTable[K, V, M]) Index(dst []uint32, values []V) {
   424  	for i, value := range values {
   425  		k := t.k(value)
   426  		n, found := t.t.Get(k)
   427  		if !found {
   428  			n = uint32(len(t.s))
   429  			t.s = append(t.s, t.v(value))
   430  			t.t.Put(k, n)
   431  		}
   432  		dst[i] = n
   433  	}
   434  }
   435  
   436  func (t *RewriteTable[K, V, M]) Append(values []V) {
   437  	for _, value := range values {
   438  		k := t.k(value)
   439  		n := uint32(len(t.s))
   440  		t.s = append(t.s, t.v(value))
   441  		t.t.Put(k, n)
   442  	}
   443  }
   444  
   445  func (t *RewriteTable[K, V, M]) Values() []M { return t.s }