github.com/grafana/pyroscope@v1.18.0/pkg/pprof/pprof.go

github.com/grafana/pyroscope@v1.18.0/pkg/pprof/pprof.go (about)

     1  package pprof
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"encoding/hex"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  	"unsafe"
    16  
    17  	"github.com/cespare/xxhash/v2"
    18  	"github.com/colega/zeropool"
    19  	"github.com/google/pprof/profile"
    20  	"github.com/klauspost/compress/gzip"
    21  	"github.com/pkg/errors"
    22  	"github.com/samber/lo"
    23  
    24  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    25  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    26  	"github.com/grafana/pyroscope/pkg/slices"
    27  	"github.com/grafana/pyroscope/pkg/util"
    28  )
    29  
    30  var (
    31  	gzipReaderPool = sync.Pool{
    32  		New: func() any {
    33  			return &gzipReader{
    34  				reader: bytes.NewReader(nil),
    35  			}
    36  		},
    37  	}
    38  	gzipWriterPool = sync.Pool{
    39  		New: func() any {
    40  			return gzip.NewWriter(io.Discard)
    41  		},
    42  	}
    43  	bufPool = sync.Pool{
    44  		New: func() any {
    45  			return bytes.NewBuffer(nil)
    46  		},
    47  	}
    48  )
    49  
    50  type gzipReader struct {
    51  	gzip   *gzip.Reader
    52  	reader *bytes.Reader
    53  }
    54  
    55  // open gzip, create reader if required
    56  func (r *gzipReader) gzipOpen() error {
    57  	var err error
    58  	if r.gzip == nil {
    59  		r.gzip, err = gzip.NewReader(r.reader)
    60  	} else {
    61  		err = r.gzip.Reset(r.reader)
    62  	}
    63  	return err
    64  }
    65  
    66  func (r *gzipReader) openBytes(input []byte) (io.Reader, error) {
    67  	r.reader.Reset(input)
    68  
    69  	// handle if data is not gzipped at all
    70  	if err := r.gzipOpen(); err == gzip.ErrHeader {
    71  		r.reader.Reset(input)
    72  		return r.reader, nil
    73  	} else if err != nil {
    74  		return nil, errors.Wrap(err, "gzip reset")
    75  	}
    76  
    77  	return r.gzip, nil
    78  }
    79  
    80  func NewProfile() *Profile {
    81  	return RawFromProto(new(profilev1.Profile))
    82  }
    83  
    84  func RawFromProto(pbp *profilev1.Profile) *Profile {
    85  	return &Profile{Profile: pbp}
    86  }
    87  
    88  func RawFromBytes(input []byte) (_ *Profile, err error) {
    89  	return RawFromBytesWithLimit(input, 0)
    90  }
    91  
    92  type ErrDecompressedSizeExceedsLimit struct {
    93  	Limit int64
    94  }
    95  
    96  func (e *ErrDecompressedSizeExceedsLimit) Error() string {
    97  	return fmt.Sprintf("decompressed size exceeds maximum allowed size of %d bytes", e.Limit)
    98  }
    99  
   100  // RawFromBytesWithLimit reads a profile from bytes with an optional size limit.
   101  // maxSize limits the decompressed size in bytes. Use 0 for no limit.
   102  // This prevents zip bomb attacks where small compressed data expands to huge sizes.
   103  func RawFromBytesWithLimit(input []byte, maxSize int64) (_ *Profile, err error) {
   104  	gzipReader := gzipReaderPool.Get().(*gzipReader)
   105  	buf := bufPool.Get().(*bytes.Buffer)
   106  	defer func() {
   107  		gzipReaderPool.Put(gzipReader)
   108  		buf.Reset()
   109  		bufPool.Put(buf)
   110  	}()
   111  
   112  	r, err := gzipReader.openBytes(input)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	// Apply size limit if specified (maxSize >= 0)
   118  	// maxSize == 0 means no limit (unlimited decompression)
   119  	if maxSize > 0 {
   120  		r = io.LimitReader(r, maxSize+1) // +1 to detect if limit is exceeded
   121  	}
   122  
   123  	if _, err = io.Copy(buf, r); err != nil {
   124  		return nil, errors.Wrap(err, "copy to buffer")
   125  	}
   126  
   127  	// Check if we hit the size limit
   128  	if maxSize > 0 && int64(buf.Len()) > maxSize {
   129  		return nil, &ErrDecompressedSizeExceedsLimit{Limit: maxSize}
   130  	}
   131  
   132  	rawSize := buf.Len()
   133  	pbp := new(profilev1.Profile)
   134  	if err = pbp.UnmarshalVT(buf.Bytes()); err != nil {
   135  		return nil, err
   136  	}
   137  
   138  	return &Profile{
   139  		Profile: pbp,
   140  		rawSize: rawSize,
   141  	}, nil
   142  }
   143  
   144  func FromBytes(input []byte, fn func(*profilev1.Profile, int) error) error {
   145  	return FromBytesWithLimit(input, 0, fn)
   146  }
   147  
   148  // FromBytesWithLimit reads a profile from bytes with an optional size limit and calls fn with the result.
   149  // maxSize limits the decompressed size in bytes. Use 0 for no limit.
   150  // This prevents zip bomb attacks where small compressed data expands to huge sizes.
   151  func FromBytesWithLimit(input []byte, maxSize int64, fn func(*profilev1.Profile, int) error) error {
   152  	p, err := RawFromBytesWithLimit(input, maxSize)
   153  	if err != nil {
   154  		return err
   155  	}
   156  	return fn(p.Profile, p.rawSize)
   157  }
   158  
   159  func FromProfile(p *profile.Profile) (*profilev1.Profile, error) {
   160  	var r profilev1.Profile
   161  	strings := make(map[string]int)
   162  
   163  	r.Sample = make([]*profilev1.Sample, 0, len(p.Sample))
   164  	r.SampleType = make([]*profilev1.ValueType, 0, len(p.SampleType))
   165  	r.Location = make([]*profilev1.Location, 0, len(p.Location))
   166  	r.Mapping = make([]*profilev1.Mapping, 0, len(p.Mapping))
   167  	r.Function = make([]*profilev1.Function, 0, len(p.Function))
   168  
   169  	addString(strings, "")
   170  	for _, st := range p.SampleType {
   171  		r.SampleType = append(r.SampleType, &profilev1.ValueType{
   172  			Type: addString(strings, st.Type),
   173  			Unit: addString(strings, st.Unit),
   174  		})
   175  	}
   176  	for _, s := range p.Sample {
   177  		sample := &profilev1.Sample{
   178  			LocationId: make([]uint64, len(s.Location)),
   179  			Value:      s.Value,
   180  		}
   181  		for i, loc := range s.Location {
   182  			sample.LocationId[i] = loc.ID
   183  		}
   184  		var keys []string
   185  		for k := range s.Label {
   186  			keys = append(keys, k)
   187  		}
   188  		sort.Strings(keys)
   189  		for _, k := range keys {
   190  			vs := s.Label[k]
   191  			for _, v := range vs {
   192  				sample.Label = append(sample.Label,
   193  					&profilev1.Label{
   194  						Key: addString(strings, k),
   195  						Str: addString(strings, v),
   196  					},
   197  				)
   198  			}
   199  		}
   200  		var numKeys []string
   201  		for k := range s.NumLabel {
   202  			numKeys = append(numKeys, k)
   203  		}
   204  		sort.Strings(numKeys)
   205  		for _, k := range numKeys {
   206  			keyX := addString(strings, k)
   207  			vs := s.NumLabel[k]
   208  			units := s.NumUnit[k]
   209  			for i, v := range vs {
   210  				var unitX int64
   211  				if len(units) != 0 {
   212  					unitX = addString(strings, units[i])
   213  				}
   214  				sample.Label = append(sample.Label,
   215  					&profilev1.Label{
   216  						Key:     keyX,
   217  						Num:     v,
   218  						NumUnit: unitX,
   219  					},
   220  				)
   221  			}
   222  		}
   223  		r.Sample = append(r.Sample, sample)
   224  	}
   225  
   226  	for _, m := range p.Mapping {
   227  		r.Mapping = append(r.Mapping, &profilev1.Mapping{
   228  			Id:              m.ID,
   229  			Filename:        addString(strings, m.File),
   230  			MemoryStart:     m.Start,
   231  			MemoryLimit:     m.Limit,
   232  			FileOffset:      m.Offset,
   233  			BuildId:         addString(strings, m.BuildID),
   234  			HasFunctions:    m.HasFunctions,
   235  			HasFilenames:    m.HasFilenames,
   236  			HasLineNumbers:  m.HasLineNumbers,
   237  			HasInlineFrames: m.HasInlineFrames,
   238  		})
   239  	}
   240  
   241  	for _, l := range p.Location {
   242  		loc := &profilev1.Location{
   243  			Id:       l.ID,
   244  			Line:     make([]*profilev1.Line, len(l.Line)),
   245  			IsFolded: l.IsFolded,
   246  			Address:  l.Address,
   247  		}
   248  		if l.Mapping != nil {
   249  			loc.MappingId = l.Mapping.ID
   250  		}
   251  		for i, ln := range l.Line {
   252  			if ln.Function != nil {
   253  				loc.Line[i] = &profilev1.Line{
   254  					FunctionId: ln.Function.ID,
   255  					Line:       ln.Line,
   256  				}
   257  			} else {
   258  				loc.Line[i] = &profilev1.Line{
   259  					FunctionId: 0,
   260  					Line:       ln.Line,
   261  				}
   262  			}
   263  		}
   264  		r.Location = append(r.Location, loc)
   265  	}
   266  	for _, f := range p.Function {
   267  		r.Function = append(r.Function, &profilev1.Function{
   268  			Id:         f.ID,
   269  			Name:       addString(strings, f.Name),
   270  			SystemName: addString(strings, f.SystemName),
   271  			Filename:   addString(strings, f.Filename),
   272  			StartLine:  f.StartLine,
   273  		})
   274  	}
   275  
   276  	r.DropFrames = addString(strings, p.DropFrames)
   277  	r.KeepFrames = addString(strings, p.KeepFrames)
   278  
   279  	if pt := p.PeriodType; pt != nil {
   280  		r.PeriodType = &profilev1.ValueType{
   281  			Type: addString(strings, pt.Type),
   282  			Unit: addString(strings, pt.Unit),
   283  		}
   284  	}
   285  
   286  	for _, c := range p.Comments {
   287  		r.Comment = append(r.Comment, addString(strings, c))
   288  	}
   289  
   290  	r.DefaultSampleType = addString(strings, p.DefaultSampleType)
   291  	r.DurationNanos = p.DurationNanos
   292  	r.TimeNanos = p.TimeNanos
   293  	r.Period = p.Period
   294  	r.StringTable = make([]string, len(strings))
   295  	for s, i := range strings {
   296  		r.StringTable[i] = s
   297  	}
   298  	return &r, nil
   299  }
   300  
   301  func addString(strings map[string]int, s string) int64 {
   302  	i, ok := strings[s]
   303  	if !ok {
   304  		i = len(strings)
   305  		strings[s] = i
   306  	}
   307  	return int64(i)
   308  }
   309  
   310  func OpenFile(path string) (*Profile, error) {
   311  	data, err := os.ReadFile(path)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	return RawFromBytes(data)
   317  }
   318  
   319  type Profile struct {
   320  	*profilev1.Profile
   321  	hasher  SampleHasher
   322  	stats   sanitizeStats
   323  	rawSize int
   324  }
   325  
   326  // RawSize of the profile
   327  func (p *Profile) RawSize() int {
   328  	return p.rawSize
   329  }
   330  
   331  // WriteTo writes the profile to the given writer.
   332  func (p *Profile) WriteTo(w io.Writer) (int64, error) {
   333  	buf := bufPool.Get().(*bytes.Buffer)
   334  	defer func() {
   335  		buf.Reset()
   336  		bufPool.Put(buf)
   337  	}()
   338  	buf.Grow(p.SizeVT())
   339  	data := buf.Bytes()
   340  	n, err := p.MarshalToVT(data)
   341  	if err != nil {
   342  		return 0, err
   343  	}
   344  	data = data[:n]
   345  
   346  	gzipWriter := gzipWriterPool.Get().(*gzip.Writer)
   347  	gzipWriter.Reset(w)
   348  	defer func() {
   349  		// reset gzip writer and return to pool
   350  		gzipWriter.Reset(io.Discard)
   351  		gzipWriterPool.Put(gzipWriter)
   352  	}()
   353  
   354  	written, err := gzipWriter.Write(data)
   355  	if err != nil {
   356  		return 0, errors.Wrap(err, "gzip write")
   357  	}
   358  	if err := gzipWriter.Close(); err != nil {
   359  		return 0, errors.Wrap(err, "gzip close")
   360  	}
   361  	return int64(written), nil
   362  }
   363  
   364  type sortedSample struct {
   365  	samples []*profilev1.Sample
   366  	hashes  []uint64
   367  }
   368  
   369  func (s *sortedSample) Len() int {
   370  	return len(s.samples)
   371  }
   372  
   373  func (s *sortedSample) Less(i, j int) bool {
   374  	return s.hashes[i] < s.hashes[j]
   375  }
   376  
   377  func (s *sortedSample) Swap(i, j int) {
   378  	s.samples[i], s.samples[j] = s.samples[j], s.samples[i]
   379  	s.hashes[i], s.hashes[j] = s.hashes[j], s.hashes[i]
   380  }
   381  
   382  var currentTime = time.Now
   383  
   384  // Normalize normalizes the profile by:
   385  //   - Removing all duplicate samples (summing their values).
   386  //   - Removing redundant profile labels (byte => unique of an allocation site)
   387  //     todo: We should reassess if this was a good choice because by merging duplicate stacktrace samples
   388  //     we cannot recompute the allocation per site ("bytes") profile label.
   389  //   - Removing empty samples.
   390  //   - Then remove unused references.
   391  //   - Ensure that the profile has a time_nanos set
   392  //   - Removes addresses from symbolized profiles.
   393  //   - Removes elements with invalid references.
   394  //   - Converts identifiers to indices.
   395  //   - Ensures that string_table[0] is "".
   396  func (p *Profile) Normalize() {
   397  	p.stats.samplesTotal = len(p.Sample)
   398  
   399  	// if the profile has no time, set it to now
   400  	if p.TimeNanos == 0 {
   401  		p.TimeNanos = currentTime().UnixNano()
   402  	}
   403  
   404  	// Non-string labels are not supported.
   405  	for _, sample := range p.Sample {
   406  		sample.Label = slices.RemoveInPlace(sample.Label, func(label *profilev1.Label, i int) bool {
   407  			return label.Str == 0
   408  		})
   409  	}
   410  
   411  	// Remove samples.
   412  	var removedSamples []*profilev1.Sample
   413  	p.Sample = slices.RemoveInPlace(p.Sample, func(s *profilev1.Sample, i int) bool {
   414  		for j := 0; j < len(s.Value); j++ {
   415  			if s.Value[j] < 0 {
   416  				removedSamples = append(removedSamples, s)
   417  				p.stats.sampleValueNegative++
   418  				return true
   419  			}
   420  		}
   421  		for j := 0; j < len(s.Value); j++ {
   422  			if s.Value[j] > 0 {
   423  				return false
   424  			}
   425  		}
   426  		p.stats.sampleValueZero++
   427  		removedSamples = append(removedSamples, s)
   428  		return true
   429  	})
   430  
   431  	// first we sort the samples.
   432  	hashes := p.hasher.Hashes(p.Sample)
   433  	ss := &sortedSample{samples: p.Sample, hashes: hashes}
   434  	sort.Sort(ss)
   435  	p.Sample = ss.samples
   436  	hashes = ss.hashes
   437  
   438  	p.Sample = slices.RemoveInPlace(p.Sample, func(s *profilev1.Sample, i int) bool {
   439  		// if the next sample has the same hash and labels, we can remove this sample but add the value to the next sample.
   440  		if i < len(p.Sample)-1 && hashes[i] == hashes[i+1] {
   441  			// todo handle hashes collisions
   442  			for j := 0; j < len(s.Value); j++ {
   443  				p.Sample[i+1].Value[j] += s.Value[j]
   444  			}
   445  			removedSamples = append(removedSamples, s)
   446  			p.stats.sampleDuplicate++
   447  			return true
   448  		}
   449  		return false
   450  	})
   451  	// Remove references to removed samples.
   452  	p.clearSampleReferences(removedSamples)
   453  	sanitizeProfile(p.Profile, &p.stats)
   454  	p.clearAddresses()
   455  }
   456  
   457  // Removes addresses from symbolized profiles.
   458  func (p *Profile) clearAddresses() {
   459  	for _, m := range p.Mapping {
   460  		if m.HasFunctions {
   461  			m.MemoryLimit = 0
   462  			m.FileOffset = 0
   463  			m.MemoryStart = 0
   464  		}
   465  	}
   466  	for _, l := range p.Location {
   467  		if p.Mapping[l.MappingId-1].HasFunctions {
   468  			l.Address = 0
   469  		}
   470  	}
   471  }
   472  
   473  func (p *Profile) clearSampleReferences(samples []*profilev1.Sample) {
   474  	if len(samples) == 0 {
   475  		return
   476  	}
   477  	// remove all data not used anymore.
   478  	removedLocationIds := map[uint64]struct{}{}
   479  
   480  	for _, s := range samples {
   481  		for _, l := range s.LocationId {
   482  			removedLocationIds[l] = struct{}{}
   483  		}
   484  	}
   485  
   486  	// figure which removed Locations IDs are not used.
   487  	for _, s := range p.Sample {
   488  		for _, l := range s.LocationId {
   489  			delete(removedLocationIds, l)
   490  		}
   491  	}
   492  	if len(removedLocationIds) == 0 {
   493  		return
   494  	}
   495  	removedFunctionIds := map[uint64]struct{}{}
   496  	// remove the locations that are not used anymore.
   497  	p.Location = slices.RemoveInPlace(p.Location, func(loc *profilev1.Location, _ int) bool {
   498  		if _, ok := removedLocationIds[loc.Id]; ok {
   499  			for _, l := range loc.Line {
   500  				removedFunctionIds[l.FunctionId] = struct{}{}
   501  			}
   502  			return true
   503  		}
   504  		return false
   505  	})
   506  
   507  	if len(removedFunctionIds) == 0 {
   508  		return
   509  	}
   510  	// figure which removed Function IDs are not used.
   511  	for _, l := range p.Location {
   512  		for _, f := range l.Line {
   513  			// 	// that ID is used in another location, remove it.
   514  			delete(removedFunctionIds, f.FunctionId)
   515  		}
   516  	}
   517  	removedNamesMap := map[int64]struct{}{}
   518  	// remove the functions that are not used anymore.
   519  	p.Function = slices.RemoveInPlace(p.Function, func(fn *profilev1.Function, _ int) bool {
   520  		if _, ok := removedFunctionIds[fn.Id]; ok {
   521  			removedNamesMap[fn.Name] = struct{}{}
   522  			removedNamesMap[fn.SystemName] = struct{}{}
   523  			removedNamesMap[fn.Filename] = struct{}{}
   524  			return true
   525  		}
   526  		return false
   527  	})
   528  
   529  	if len(removedNamesMap) == 0 {
   530  		return
   531  	}
   532  	// remove names that are still used.
   533  	p.visitAllNameReferences(func(idx *int64) {
   534  		delete(removedNamesMap, *idx)
   535  	})
   536  	if len(removedNamesMap) == 0 {
   537  		return
   538  	}
   539  
   540  	// remove the names that are not used anymore.
   541  	p.StringTable = lo.Reject(p.StringTable, func(_ string, i int) bool {
   542  		_, ok := removedNamesMap[int64(i)]
   543  		return ok
   544  	})
   545  	removedNames := lo.Keys(removedNamesMap)
   546  	// Sort to remove in order.
   547  	sort.Slice(removedNames, func(i, j int) bool { return removedNames[i] < removedNames[j] })
   548  	// Now shift all indices [0,1,2,3,4,5,6]
   549  	// if we removed [1,2,5] then we need to shift [3,4] to [1,2] and [6] to [3]
   550  	// Basically we need to shift all indices that are greater than the removed index by the amount of removed indices.
   551  	p.visitAllNameReferences(func(idx *int64) {
   552  		var shift int64
   553  		for i := 0; i < len(removedNames); i++ {
   554  			if *idx > removedNames[i] {
   555  				shift++
   556  				continue
   557  			}
   558  			break
   559  		}
   560  		*idx -= shift
   561  	})
   562  }
   563  
   564  func (p *Profile) visitAllNameReferences(fn func(*int64)) {
   565  	fn(&p.DropFrames)
   566  	fn(&p.KeepFrames)
   567  	fn(&p.PeriodType.Type)
   568  	fn(&p.PeriodType.Unit)
   569  	for _, st := range p.SampleType {
   570  		fn(&st.Type)
   571  		fn(&st.Unit)
   572  	}
   573  	for _, m := range p.Mapping {
   574  		fn(&m.Filename)
   575  		fn(&m.BuildId)
   576  	}
   577  	for _, s := range p.Sample {
   578  		for _, l := range s.Label {
   579  			fn(&l.Key)
   580  			fn(&l.Num)
   581  			fn(&l.NumUnit)
   582  		}
   583  	}
   584  	for _, f := range p.Function {
   585  		fn(&f.Name)
   586  		fn(&f.SystemName)
   587  		fn(&f.Filename)
   588  	}
   589  	for i := 0; i < len(p.Comment); i++ {
   590  		fn(&p.Comment[i])
   591  	}
   592  }
   593  
   594  type SampleHasher struct {
   595  	hash *xxhash.Digest
   596  	b    [8]byte
   597  }
   598  
   599  func (h SampleHasher) Hashes(samples []*profilev1.Sample) []uint64 {
   600  	if h.hash == nil {
   601  		h.hash = xxhash.New()
   602  	} else {
   603  		h.hash.Reset()
   604  	}
   605  
   606  	hashes := make([]uint64, len(samples))
   607  	for i, sample := range samples {
   608  		if _, err := h.hash.Write(uint64Bytes(sample.LocationId)); err != nil {
   609  			panic("unable to write hash")
   610  		}
   611  		sort.Sort(LabelsByKeyValue(sample.Label))
   612  		for _, l := range sample.Label {
   613  			binary.LittleEndian.PutUint32(h.b[:4], uint32(l.Key))
   614  			binary.LittleEndian.PutUint32(h.b[4:], uint32(l.Str))
   615  			if _, err := h.hash.Write(h.b[:]); err != nil {
   616  				panic("unable to write label hash")
   617  			}
   618  		}
   619  		hashes[i] = h.hash.Sum64()
   620  		h.hash.Reset()
   621  	}
   622  
   623  	return hashes
   624  }
   625  
   626  func uint64Bytes(s []uint64) []byte {
   627  	if len(s) == 0 {
   628  		return nil
   629  	}
   630  	p := (*byte)(unsafe.Pointer(&s[0]))
   631  	return unsafe.Slice(p, len(s)*8)
   632  }
   633  
   634  type SamplesByLabels []*profilev1.Sample
   635  
   636  func (s SamplesByLabels) Len() int {
   637  	return len(s)
   638  }
   639  
   640  func (s SamplesByLabels) Less(i, j int) bool {
   641  	return CompareSampleLabels(s[i].Label, s[j].Label) < 0
   642  }
   643  
   644  func (s SamplesByLabels) Swap(i, j int) {
   645  	s[i], s[j] = s[j], s[i]
   646  }
   647  
   648  type LabelsByKeyValue []*profilev1.Label
   649  
   650  func (l LabelsByKeyValue) Len() int {
   651  	return len(l)
   652  }
   653  
   654  func (l LabelsByKeyValue) Less(i, j int) bool {
   655  	a, b := l[i], l[j]
   656  	if a.Key == b.Key {
   657  		return a.Str < b.Str
   658  	}
   659  	return a.Key < b.Key
   660  }
   661  
   662  func (l LabelsByKeyValue) Swap(i, j int) {
   663  	l[i], l[j] = l[j], l[i]
   664  }
   665  
   666  // SampleGroup refers to a group of samples that share the same
   667  // labels. Note that the Span ID label is handled in a special
   668  // way and is not included in the Labels member but is kept as
   669  // as a sample label.
   670  type SampleGroup struct {
   671  	Labels  []*profilev1.Label
   672  	Samples []*profilev1.Sample
   673  }
   674  
   675  // GroupSamplesByLabels splits samples into groups by labels.
   676  // It's expected that sample labels are sorted.
   677  func GroupSamplesByLabels(p *profilev1.Profile) []SampleGroup {
   678  	if len(p.Sample) < 1 {
   679  		return nil
   680  	}
   681  	var result []SampleGroup
   682  	var start int
   683  	labels := p.Sample[start].Label
   684  	for i := 1; i < len(p.Sample); i++ {
   685  		if CompareSampleLabels(p.Sample[i].Label, labels) != 0 {
   686  			result = append(result, SampleGroup{
   687  				Labels:  labels,
   688  				Samples: p.Sample[start:i],
   689  			})
   690  			start = i
   691  			labels = p.Sample[i].Label
   692  		}
   693  	}
   694  	return append(result, SampleGroup{
   695  		Labels:  labels,
   696  		Samples: p.Sample[start:],
   697  	})
   698  }
   699  
   700  // GroupSamplesWithoutLabels splits samples into groups by labels
   701  // ignoring ones from the list: those are preserved as sample labels.
   702  // It's expected that sample labels are sorted.
   703  func GroupSamplesWithoutLabels(p *profilev1.Profile, labels ...string) []SampleGroup {
   704  	if len(labels) > 0 {
   705  		return GroupSamplesWithoutLabelsByKey(p, LabelKeysByString(p, labels...))
   706  	}
   707  	return GroupSamplesByLabels(p)
   708  }
   709  
   710  func GroupSamplesWithoutLabelsByKey(p *profilev1.Profile, keys []int64) []SampleGroup {
   711  	if len(p.Sample) == 0 {
   712  		return nil
   713  	}
   714  	for _, s := range p.Sample {
   715  		sort.Sort(LabelsByKeyValue(s.Label))
   716  		// We hide labels matching the keys to the end
   717  		// of the slice, after len() boundary.
   718  		s.Label = LabelsWithout(s.Label, keys)
   719  		sort.Sort(LabelsByKeyValue(s.Label)) // TODO: Find a way to avoid this.
   720  	}
   721  	// Sorting and grouping accounts only for labels kept.
   722  	sort.Sort(SamplesByLabels(p.Sample))
   723  	groups := GroupSamplesByLabels(p)
   724  	for _, s := range p.Sample {
   725  		// Replace the labels (that match the group name)
   726  		// with hidden labels matching the keys.
   727  		s.Label = restoreRemovedLabels(s.Label)
   728  	}
   729  	return groups
   730  }
   731  
   732  func restoreRemovedLabels(labels []*profilev1.Label) []*profilev1.Label {
   733  	labels = labels[len(labels):cap(labels)]
   734  	for i, l := range labels {
   735  		if l == nil { // labels had extra capacity in sample labels
   736  			labels = labels[:i]
   737  			break
   738  		}
   739  	}
   740  	return labels
   741  }
   742  
   743  // CompareSampleLabels compares sample label pairs.
   744  // It's expected that sample labels are sorted.
   745  // The result will be 0 if a == b, < 0 if a < b, and > 0 if a > b.
   746  func CompareSampleLabels(a, b []*profilev1.Label) int {
   747  	l := len(a)
   748  	if len(b) < l {
   749  		l = len(b)
   750  	}
   751  	for i := 0; i < l; i++ {
   752  		if a[i].Key != b[i].Key {
   753  			if a[i].Key < b[i].Key {
   754  				return -1
   755  			}
   756  			return 1
   757  		}
   758  		if a[i].Str != b[i].Str {
   759  			if a[i].Str < b[i].Str {
   760  				return -1
   761  			}
   762  			return 1
   763  		}
   764  	}
   765  	return len(a) - len(b)
   766  }
   767  
   768  func LabelsWithout(labels []*profilev1.Label, keys []int64) []*profilev1.Label {
   769  	n := FilterLabelsInPlace(labels, keys)
   770  	slices.Reverse(labels) // TODO: Find a way to avoid this.
   771  	return labels[:len(labels)-n]
   772  }
   773  
   774  func FilterLabelsInPlace(labels []*profilev1.Label, keys []int64) int {
   775  	boundaryIdx := 0
   776  	i := 0 // Pointer to labels
   777  	j := 0 // Pointer to keys
   778  	for i < len(labels) && j < len(keys) {
   779  		if labels[i].Key == keys[j] {
   780  			// If label key matches a key in keys, swap and increment both pointers
   781  			labels[i], labels[boundaryIdx] = labels[boundaryIdx], labels[i]
   782  			boundaryIdx++
   783  			i++
   784  		} else if labels[i].Key < keys[j] {
   785  			i++ // Advance label pointer.
   786  		} else {
   787  			j++ // Advance key pointer.
   788  		}
   789  	}
   790  	return boundaryIdx
   791  }
   792  
   793  func LabelKeysByString(p *profilev1.Profile, keys ...string) []int64 {
   794  	m := LabelKeysMapByString(p, keys...)
   795  	s := make([]int64, len(keys))
   796  	for i, k := range keys {
   797  		s[i] = m[k]
   798  	}
   799  	sort.Slice(s, func(i, j int) bool {
   800  		return s[i] < s[j]
   801  	})
   802  	return s
   803  }
   804  
   805  func LabelKeysMapByString(p *profilev1.Profile, keys ...string) map[string]int64 {
   806  	m := make(map[string]int64, len(keys))
   807  	for _, k := range keys {
   808  		m[k] = 0
   809  	}
   810  	for i, v := range p.StringTable {
   811  		if _, ok := m[v]; ok {
   812  			m[v] = int64(i)
   813  		}
   814  	}
   815  	return m
   816  }
   817  
   818  type SampleExporter struct {
   819  	profile *profilev1.Profile
   820  
   821  	locations lookupTable
   822  	functions lookupTable
   823  	mappings  lookupTable
   824  	strings   lookupTable
   825  }
   826  
   827  type lookupTable struct {
   828  	indices  []int32
   829  	resolved int32
   830  }
   831  
   832  func (t *lookupTable) lookupString(idx int64) int64 {
   833  	if idx != 0 {
   834  		return int64(t.lookup(idx))
   835  	}
   836  	return 0
   837  }
   838  
   839  func (t *lookupTable) lookup(idx int64) int32 {
   840  	x := t.indices[idx]
   841  	if x != 0 {
   842  		return x
   843  	}
   844  	t.resolved++
   845  	t.indices[idx] = t.resolved
   846  	return t.resolved
   847  }
   848  
   849  func (t *lookupTable) reset() {
   850  	t.resolved = 0
   851  	for i := 0; i < len(t.indices); i++ {
   852  		t.indices[i] = 0
   853  	}
   854  }
   855  
   856  func NewSampleExporter(p *profilev1.Profile) *SampleExporter {
   857  	return &SampleExporter{
   858  		profile:   p,
   859  		locations: lookupTable{indices: make([]int32, len(p.Location))},
   860  		functions: lookupTable{indices: make([]int32, len(p.Function))},
   861  		mappings:  lookupTable{indices: make([]int32, len(p.Mapping))},
   862  		strings:   lookupTable{indices: make([]int32, len(p.StringTable))},
   863  	}
   864  }
   865  
   866  // ExportSamples creates a new complete profile with the subset
   867  // of samples provided. It is assumed that those are part of the
   868  // source profile. Provided samples are modified in place.
   869  //
   870  // The same exporter instance can be used to export non-overlapping
   871  // sample sets from a single profile.
   872  func (e *SampleExporter) ExportSamples(dst *profilev1.Profile, samples []*profilev1.Sample) *profilev1.Profile {
   873  	e.reset()
   874  
   875  	dst.Sample = samples
   876  	dst.TimeNanos = e.profile.TimeNanos
   877  	dst.DurationNanos = e.profile.DurationNanos
   878  	dst.Period = e.profile.Period
   879  	dst.DefaultSampleType = e.profile.DefaultSampleType
   880  
   881  	dst.SampleType = slices.GrowLen(dst.SampleType, len(e.profile.SampleType))
   882  	for i, v := range e.profile.SampleType {
   883  		dst.SampleType[i] = &profilev1.ValueType{
   884  			Type: e.strings.lookupString(v.Type),
   885  			Unit: e.strings.lookupString(v.Unit),
   886  		}
   887  	}
   888  	dst.DropFrames = e.strings.lookupString(e.profile.DropFrames)
   889  	dst.KeepFrames = e.strings.lookupString(e.profile.KeepFrames)
   890  	if c := len(e.profile.Comment); c > 0 {
   891  		dst.Comment = slices.GrowLen(dst.Comment, c)
   892  		for i, comment := range e.profile.Comment {
   893  			dst.Comment[i] = e.strings.lookupString(comment)
   894  		}
   895  	}
   896  
   897  	// Rewrite sample stack traces and labels.
   898  	// Note that the provided samples are modified in-place.
   899  	for _, sample := range dst.Sample {
   900  		for i, location := range sample.LocationId {
   901  			sample.LocationId[i] = uint64(e.locations.lookup(int64(location - 1)))
   902  		}
   903  		for _, label := range sample.Label {
   904  			label.Key = e.strings.lookupString(label.Key)
   905  			if label.Str != 0 {
   906  				label.Str = e.strings.lookupString(label.Str)
   907  			} else {
   908  				label.NumUnit = e.strings.lookupString(label.NumUnit)
   909  			}
   910  		}
   911  	}
   912  
   913  	// Copy locations.
   914  	dst.Location = slices.GrowLen(dst.Location, int(e.locations.resolved))
   915  	for i, j := range e.locations.indices {
   916  		// i points to the location in the source profile.
   917  		// j point to the location in the new profile.
   918  		if j == 0 {
   919  			// The location is not referenced by any of the samples.
   920  			continue
   921  		}
   922  		loc := e.profile.Location[i]
   923  		newLoc := &profilev1.Location{
   924  			Id:        uint64(j),
   925  			MappingId: uint64(e.mappings.lookup(int64(loc.MappingId - 1))),
   926  			Address:   loc.Address,
   927  			Line:      make([]*profilev1.Line, len(loc.Line)),
   928  			IsFolded:  loc.IsFolded,
   929  		}
   930  		dst.Location[j-1] = newLoc
   931  		for l, line := range loc.Line {
   932  			newLoc.Line[l] = &profilev1.Line{
   933  				FunctionId: uint64(e.functions.lookup(int64(line.FunctionId - 1))),
   934  				Line:       line.Line,
   935  			}
   936  		}
   937  	}
   938  
   939  	// Copy mappings.
   940  	dst.Mapping = slices.GrowLen(dst.Mapping, int(e.mappings.resolved))
   941  	for i, j := range e.mappings.indices {
   942  		if j == 0 {
   943  			continue
   944  		}
   945  		m := e.profile.Mapping[i]
   946  		dst.Mapping[j-1] = &profilev1.Mapping{
   947  			Id:              uint64(j),
   948  			MemoryStart:     m.MemoryStart,
   949  			MemoryLimit:     m.MemoryLimit,
   950  			FileOffset:      m.FileOffset,
   951  			Filename:        e.strings.lookupString(m.Filename),
   952  			BuildId:         e.strings.lookupString(m.BuildId),
   953  			HasFunctions:    m.HasFunctions,
   954  			HasFilenames:    m.HasFilenames,
   955  			HasLineNumbers:  m.HasLineNumbers,
   956  			HasInlineFrames: m.HasInlineFrames,
   957  		}
   958  	}
   959  
   960  	// Copy functions.
   961  	dst.Function = slices.GrowLen(dst.Function, int(e.functions.resolved))
   962  	for i, j := range e.functions.indices {
   963  		if j == 0 {
   964  			continue
   965  		}
   966  		fn := e.profile.Function[i]
   967  		dst.Function[j-1] = &profilev1.Function{
   968  			Id:         uint64(j),
   969  			Name:       e.strings.lookupString(fn.Name),
   970  			SystemName: e.strings.lookupString(fn.SystemName),
   971  			Filename:   e.strings.lookupString(fn.Filename),
   972  			StartLine:  fn.StartLine,
   973  		}
   974  	}
   975  
   976  	if e.profile.PeriodType != nil {
   977  		dst.PeriodType = &profilev1.ValueType{
   978  			Type: e.strings.lookupString(e.profile.PeriodType.Type),
   979  			Unit: e.strings.lookupString(e.profile.PeriodType.Unit),
   980  		}
   981  	}
   982  
   983  	// Copy strings.
   984  	dst.StringTable = slices.GrowLen(dst.StringTable, int(e.strings.resolved)+1)
   985  	for i, j := range e.strings.indices {
   986  		if j == 0 {
   987  			continue
   988  		}
   989  		dst.StringTable[j] = e.profile.StringTable[i]
   990  	}
   991  
   992  	return dst
   993  }
   994  
   995  func (e *SampleExporter) reset() {
   996  	e.locations.reset()
   997  	e.functions.reset()
   998  	e.mappings.reset()
   999  	e.strings.reset()
  1000  }
  1001  
  1002  var uint32SlicePool zeropool.Pool[[]uint32]
  1003  
  1004  const (
  1005  	ProfileIDLabelName = "profile_id" // For compatibility with the existing clients.
  1006  	SpanIDLabelName    = "span_id"    // Will be supported in the future.
  1007  )
  1008  
  1009  func LabelID(p *profilev1.Profile, name string) int64 {
  1010  	for i, s := range p.StringTable {
  1011  		if s == name {
  1012  			return int64(i)
  1013  		}
  1014  	}
  1015  	return -1
  1016  }
  1017  
  1018  func ProfileSpans(p *profilev1.Profile) []uint64 {
  1019  	if i := LabelID(p, SpanIDLabelName); i > 0 {
  1020  		return Spans(p, i)
  1021  	}
  1022  	return nil
  1023  }
  1024  
  1025  func Spans(p *profilev1.Profile, spanIDLabelIdx int64) []uint64 {
  1026  	tmp := make([]byte, 8)
  1027  	s := make([]uint64, len(p.Sample))
  1028  	for i, sample := range p.Sample {
  1029  		s[i] = spanIDFromLabels(tmp, spanIDLabelIdx, p.StringTable, sample.Label)
  1030  	}
  1031  	return s
  1032  }
  1033  
  1034  func spanIDFromLabels(tmp []byte, labelIdx int64, stringTable []string, labels []*profilev1.Label) uint64 {
  1035  	for _, x := range labels {
  1036  		if x.Key != labelIdx {
  1037  			continue
  1038  		}
  1039  		if s := stringTable[x.Str]; decodeSpanID(tmp, s) {
  1040  			return binary.LittleEndian.Uint64(tmp)
  1041  		}
  1042  	}
  1043  	return 0
  1044  }
  1045  
  1046  func decodeSpanID(tmp []byte, s string) bool {
  1047  	if len(s) != 16 {
  1048  		return false
  1049  	}
  1050  	_, err := hex.Decode(tmp, util.YoloBuf(s))
  1051  	return err == nil
  1052  }
  1053  
  1054  func RenameLabel(p *profilev1.Profile, oldName, newName string) {
  1055  	var oi, ni int64
  1056  	for i, s := range p.StringTable {
  1057  		if s == oldName {
  1058  			oi = int64(i)
  1059  			break
  1060  		}
  1061  	}
  1062  	if oi == 0 {
  1063  		return
  1064  	}
  1065  	for i, s := range p.StringTable {
  1066  		if s == newName {
  1067  			ni = int64(i)
  1068  			break
  1069  		}
  1070  	}
  1071  	if ni == 0 {
  1072  		ni = int64(len(p.StringTable))
  1073  		p.StringTable = append(p.StringTable, newName)
  1074  	}
  1075  	for _, s := range p.Sample {
  1076  		for _, l := range s.Label {
  1077  			if l.Key == oi {
  1078  				l.Key = ni
  1079  			}
  1080  		}
  1081  	}
  1082  }
  1083  
  1084  func ZeroLabelStrings(p *profilev1.Profile) {
  1085  	// TODO: A true bitmap should be used instead.
  1086  	st := slices.GrowLen(uint32SlicePool.Get(), len(p.StringTable))
  1087  	slices.Clear(st)
  1088  	defer uint32SlicePool.Put(st)
  1089  	for _, t := range p.SampleType {
  1090  		st[t.Type] = 1
  1091  		st[t.Unit] = 1
  1092  	}
  1093  	for _, f := range p.Function {
  1094  		st[f.Filename] = 1
  1095  		st[f.SystemName] = 1
  1096  		st[f.Name] = 1
  1097  	}
  1098  	for _, m := range p.Mapping {
  1099  		st[m.Filename] = 1
  1100  		st[m.BuildId] = 1
  1101  	}
  1102  	for _, c := range p.Comment {
  1103  		st[c] = 1
  1104  	}
  1105  	st[p.KeepFrames] = 1
  1106  	st[p.DropFrames] = 1
  1107  	var zeroString string
  1108  	for i, v := range st {
  1109  		if v == 0 {
  1110  			p.StringTable[i] = zeroString
  1111  		}
  1112  	}
  1113  }
  1114  
  1115  var languageMatchers = map[string][]string{
  1116  	"go":     {".go", "/usr/local/go/"},
  1117  	"java":   {"java/", "sun/"},
  1118  	"ruby":   {".rb", "gems/"},
  1119  	"nodejs": {"./node_modules/", ".js"},
  1120  	"dotnet": {"System.", "Microsoft."},
  1121  	"python": {".py"},
  1122  	"rust":   {"main.rs", "core.rs"},
  1123  }
  1124  
  1125  func GetLanguage(profile *Profile) string {
  1126  	for _, symbol := range profile.StringTable {
  1127  		for lang, matcherPatterns := range languageMatchers {
  1128  			for _, pattern := range matcherPatterns {
  1129  				if strings.HasPrefix(symbol, pattern) || strings.HasSuffix(symbol, pattern) {
  1130  					return lang
  1131  				}
  1132  			}
  1133  		}
  1134  	}
  1135  	return "unknown"
  1136  }
  1137  
  1138  // SetProfileMetadata sets the metadata on the profile.
  1139  func SetProfileMetadata(p *profilev1.Profile, ty *typesv1.ProfileType, timeNanos int64, period int64) {
  1140  	m := map[string]int64{
  1141  		ty.SampleUnit: -1,
  1142  		ty.SampleType: -1,
  1143  		ty.PeriodType: -1,
  1144  		ty.PeriodUnit: -1,
  1145  	}
  1146  	for i, s := range p.StringTable {
  1147  		if _, ok := m[s]; ok {
  1148  			m[s] = int64(i)
  1149  		}
  1150  	}
  1151  	for _, k := range []string{
  1152  		ty.SampleUnit,
  1153  		ty.SampleType,
  1154  		ty.PeriodType,
  1155  		ty.PeriodUnit,
  1156  	} {
  1157  		if m[k] == -1 {
  1158  			i := int64(len(p.StringTable))
  1159  			p.StringTable = append(p.StringTable, k)
  1160  			m[k] = i
  1161  		}
  1162  	}
  1163  
  1164  	p.SampleType = []*profilev1.ValueType{{Type: m[ty.SampleType], Unit: m[ty.SampleUnit]}}
  1165  	p.DefaultSampleType = m[ty.SampleType]
  1166  	p.PeriodType = &profilev1.ValueType{Type: m[ty.PeriodType], Unit: m[ty.PeriodUnit]}
  1167  	p.TimeNanos = timeNanos
  1168  
  1169  	if period != 0 {
  1170  		p.Period = period
  1171  	}
  1172  
  1173  	// Try to guess period based on the profile type.
  1174  	// TODO: This should be encoded into the profile type.
  1175  	switch ty.Name {
  1176  	case "process_cpu":
  1177  		p.Period = 1000000000
  1178  	case "memory":
  1179  		p.Period = 512 * 1024
  1180  	default:
  1181  		p.Period = 1
  1182  	}
  1183  }
  1184  
  1185  func Marshal(p *profilev1.Profile, compress bool) ([]byte, error) {
  1186  	b, err := p.MarshalVT()
  1187  	if err != nil {
  1188  		return nil, err
  1189  	}
  1190  	if !compress {
  1191  		return b, nil
  1192  	}
  1193  	var buf bytes.Buffer
  1194  	buf.Grow(len(b) / 2)
  1195  	gw := gzipWriterPool.Get().(*gzip.Writer)
  1196  	gw.Reset(&buf)
  1197  	defer func() {
  1198  		gw.Reset(io.Discard)
  1199  		gzipWriterPool.Put(gw)
  1200  	}()
  1201  	if _, err = gw.Write(b); err != nil {
  1202  		return nil, err
  1203  	}
  1204  	if err = gw.Flush(); err != nil {
  1205  		return nil, err
  1206  	}
  1207  	if err = gw.Close(); err != nil {
  1208  		return nil, err
  1209  	}
  1210  	return buf.Bytes(), nil
  1211  }
  1212  
  1213  func MustMarshal(p *profilev1.Profile, compress bool) []byte {
  1214  	b, err := Marshal(p, compress)
  1215  	if err != nil {
  1216  		panic(err)
  1217  	}
  1218  	return b
  1219  }
  1220  
  1221  func Unmarshal(data []byte, p *profilev1.Profile) error {
  1222  	return UnmarshalWithLimit(data, p, 0)
  1223  }
  1224  
  1225  // UnmarshalWithLimit unmarshals a profile from bytes with an optional size limit.
  1226  // maxSize limits the decompressed size in bytes. Use 0 for no limit.
  1227  // This prevents zip bomb attacks where small compressed data expands to huge sizes.
  1228  func UnmarshalWithLimit(data []byte, p *profilev1.Profile, maxSize int64) error {
  1229  	gr := gzipReaderPool.Get().(*gzipReader)
  1230  	defer gzipReaderPool.Put(gr)
  1231  	r, err := gr.openBytes(data)
  1232  	if err != nil {
  1233  		return err
  1234  	}
  1235  	buf := bufPool.Get().(*bytes.Buffer)
  1236  	defer func() {
  1237  		buf.Reset()
  1238  		bufPool.Put(buf)
  1239  	}()
  1240  	buf.Grow(len(data) * 2)
  1241  
  1242  	// Apply size limit if specified (maxSize >= 0)
  1243  	// maxSize == 0 means no limit (unlimited decompression)
  1244  	if maxSize > 0 {
  1245  		r = io.LimitReader(r, maxSize+1) // +1 to detect if limit is exceeded
  1246  	}
  1247  
  1248  	if _, err = io.Copy(buf, r); err != nil {
  1249  		return err
  1250  	}
  1251  
  1252  	// Check if we hit the size limit
  1253  	if maxSize > 0 && int64(buf.Len()) > maxSize {
  1254  		return &ErrDecompressedSizeExceedsLimit{Limit: maxSize}
  1255  	}
  1256  
  1257  	return p.UnmarshalVT(buf.Bytes())
  1258  }
  1259  
  1260  func sanitizeProfile(p *profilev1.Profile, stats *sanitizeStats) {
  1261  	if p == nil {
  1262  		return
  1263  	}
  1264  	if stats.samplesTotal == 0 {
  1265  		stats.samplesTotal = len(p.Sample)
  1266  	}
  1267  	ms := int64(len(p.StringTable))
  1268  	// Handle the case when "" is not present,
  1269  	// or is not at string_table[0].
  1270  	z := int64(-1)
  1271  	for i, s := range p.StringTable {
  1272  		if s == "" {
  1273  			z = int64(i)
  1274  			break
  1275  		}
  1276  	}
  1277  	if z == -1 {
  1278  		// No empty string found in the table.
  1279  		// Reduce number of invariants by adding one.
  1280  		z = ms
  1281  		p.StringTable = append(p.StringTable, "")
  1282  		ms++
  1283  	}
  1284  	// Swap zero string.
  1285  	p.StringTable[z], p.StringTable[0] = p.StringTable[0], p.StringTable[z]
  1286  	// Now we need to update references to strings:
  1287  	// invalid references (>= len(string_table)) are set to 0.
  1288  	// references to empty string are set to 0.
  1289  	str := func(i int64) int64 {
  1290  		if i == 0 && z > 0 {
  1291  			// z > 0 indicates that "" is not at string_table[0].
  1292  			// This means that element that used to be at 0 has
  1293  			// been moved to z.
  1294  			return z
  1295  		}
  1296  		if i == z || i >= ms || i < 0 {
  1297  			// The reference to empty string, or a string that is
  1298  			// not present in the table.
  1299  			return 0
  1300  		}
  1301  		return i
  1302  	}
  1303  
  1304  	p.SampleType = slices.RemoveInPlace(p.SampleType, func(x *profilev1.ValueType, _ int) bool {
  1305  		if x == nil {
  1306  			stats.sampleTypeNil++
  1307  			return true
  1308  		}
  1309  		x.Type = str(x.Type)
  1310  		x.Unit = str(x.Unit)
  1311  		return false
  1312  	})
  1313  	if p.PeriodType != nil {
  1314  		p.PeriodType.Type = str(p.PeriodType.Type)
  1315  		p.PeriodType.Unit = str(p.PeriodType.Unit)
  1316  	}
  1317  
  1318  	p.DefaultSampleType = str(p.DefaultSampleType)
  1319  	p.DropFrames = str(p.DropFrames)
  1320  	p.KeepFrames = str(p.KeepFrames)
  1321  	for i := range p.Comment {
  1322  		p.Comment[i] = str(p.Comment[i])
  1323  	}
  1324  
  1325  	// Sanitize mappings and references to them.
  1326  	// Locations with invalid references are removed.
  1327  	t := make(map[uint64]uint64, len(p.Location))
  1328  	j := uint64(1)
  1329  	p.Mapping = slices.RemoveInPlace(p.Mapping, func(x *profilev1.Mapping, _ int) bool {
  1330  		if x == nil {
  1331  			stats.mappingNil++
  1332  			return true
  1333  		}
  1334  		x.BuildId = str(x.BuildId)
  1335  		x.Filename = str(x.Filename)
  1336  		x.Id, t[x.Id] = j, j
  1337  		j++
  1338  		return false
  1339  	})
  1340  
  1341  	// Rewrite references to mappings, removing invalid ones.
  1342  	// Locations with mapping ID 0 are allowed: in this case,
  1343  	// a mapping stub is created.
  1344  	var mapping *profilev1.Mapping
  1345  	p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool {
  1346  		if x == nil {
  1347  			stats.locationNil++
  1348  			return true
  1349  		}
  1350  		if len(x.Line) == 0 && x.Address == 0 {
  1351  			stats.locationEmpty++
  1352  			return true
  1353  		}
  1354  		if x.MappingId == 0 {
  1355  			if mapping == nil {
  1356  				mapping = &profilev1.Mapping{Id: uint64(len(p.Mapping) + 1)}
  1357  				p.Mapping = append(p.Mapping, mapping)
  1358  			}
  1359  			x.MappingId = mapping.Id
  1360  			return false
  1361  		}
  1362  		x.MappingId = t[x.MappingId]
  1363  		if x.MappingId == 0 {
  1364  			stats.locationMappingInvalid++
  1365  			return true
  1366  		}
  1367  		return false
  1368  	})
  1369  
  1370  	// Sanitize functions and references to them.
  1371  	// Locations with invalid references are removed.
  1372  	clear(t)
  1373  	j = 1
  1374  	p.Function = slices.RemoveInPlace(p.Function, func(x *profilev1.Function, _ int) bool {
  1375  		if x == nil {
  1376  			stats.functionNil++
  1377  			return true
  1378  		}
  1379  		x.Name = str(x.Name)
  1380  		x.SystemName = str(x.SystemName)
  1381  		x.Filename = str(x.Filename)
  1382  		x.Id, t[x.Id] = j, j
  1383  		j++
  1384  		return false
  1385  	})
  1386  	// Check locations again, verifying that all functions are valid.
  1387  	p.Location = slices.RemoveInPlace(p.Location, func(x *profilev1.Location, _ int) bool {
  1388  		for _, line := range x.Line {
  1389  			if line.FunctionId = t[line.FunctionId]; line.FunctionId == 0 {
  1390  				stats.locationFunctionInvalid++
  1391  				return true
  1392  			}
  1393  		}
  1394  		return false
  1395  	})
  1396  
  1397  	// Sanitize locations and references to them.
  1398  	// Samples with invalid references are removed.
  1399  	clear(t)
  1400  	j = 1
  1401  	for _, x := range p.Location {
  1402  		x.Id, t[x.Id] = j, j
  1403  		j++
  1404  	}
  1405  
  1406  	vs := len(p.SampleType)
  1407  	p.Sample = slices.RemoveInPlace(p.Sample, func(x *profilev1.Sample, _ int) bool {
  1408  		if x == nil {
  1409  			stats.sampleNil++
  1410  			return true
  1411  		}
  1412  		if len(x.Value) != vs {
  1413  			stats.sampleValueMismatch++
  1414  			return true
  1415  		}
  1416  		for i := range x.LocationId {
  1417  			if x.LocationId[i] = t[x.LocationId[i]]; x.LocationId[i] == 0 {
  1418  				stats.sampleLocationInvalid++
  1419  				return true
  1420  			}
  1421  		}
  1422  		for _, l := range x.Label {
  1423  			if l == nil {
  1424  				stats.sampleLabelNil++
  1425  				return true
  1426  			}
  1427  			l.Key = str(l.Key)
  1428  			l.Str = str(l.Str)
  1429  			l.NumUnit = str(l.NumUnit)
  1430  		}
  1431  		return false
  1432  	})
  1433  }
  1434  
  1435  type sanitizeStats struct {
  1436  	samplesTotal  int
  1437  	sampleTypeNil int
  1438  
  1439  	mappingNil              int
  1440  	functionNil             int
  1441  	locationNil             int
  1442  	locationEmpty           int
  1443  	locationMappingInvalid  int
  1444  	locationFunctionInvalid int
  1445  
  1446  	sampleNil             int
  1447  	sampleLabelNil        int
  1448  	sampleLocationInvalid int
  1449  	sampleValueMismatch   int
  1450  	sampleValueNegative   int
  1451  	sampleValueZero       int
  1452  	sampleDuplicate       int
  1453  }
  1454  
  1455  func (s *sanitizeStats) pretty() string {
  1456  	var b strings.Builder
  1457  	b.WriteString("samples_total=")
  1458  	b.WriteString(strconv.Itoa(s.samplesTotal))
  1459  	put := func(k string, v int) {
  1460  		if v > 0 {
  1461  			b.WriteString(" ")
  1462  			b.WriteString(k)
  1463  			b.WriteString("=")
  1464  			b.WriteString(strconv.Itoa(v))
  1465  		}
  1466  	}
  1467  	put("sample_type_nil", s.sampleTypeNil)
  1468  	put("mapping_nil", s.mappingNil)
  1469  	put("function_nil", s.functionNil)
  1470  	put("location_nil", s.locationNil)
  1471  	put("location_empty", s.locationEmpty)
  1472  	put("location_mapping_invalid", s.locationMappingInvalid)
  1473  	put("location_function_invalid", s.locationFunctionInvalid)
  1474  	put("sample_nil", s.sampleNil)
  1475  	put("sample_label_nil", s.sampleLabelNil)
  1476  	put("sample_location_invalid", s.sampleLocationInvalid)
  1477  	put("sample_value_mismatch", s.sampleValueMismatch)
  1478  	put("sample_value_negative", s.sampleValueNegative)
  1479  	put("sample_value_zero", s.sampleValueZero)
  1480  	put("sample_duplicate", s.sampleDuplicate)
  1481  	return b.String()
  1482  }
  1483  
  1484  func (p *Profile) DebugString() string {
  1485  	bs, _ := p.MarshalVT()
  1486  	gp, _ := profile.ParseData(bs)
  1487  	if gp == nil {
  1488  		return "<nil>"
  1489  	}
  1490  	return gp.String()
  1491  }