github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/pprof/parser.go (about)

     1  package pprof
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/pyroscope-io/pyroscope/pkg/storage"
    10  	"github.com/pyroscope-io/pyroscope/pkg/storage/metadata"
    11  	"github.com/pyroscope-io/pyroscope/pkg/storage/segment"
    12  	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
    13  )
    14  
    15  type ParserInterface interface {
    16  	ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) error
    17  }
    18  
    19  type Parser struct {
    20  	putter              storage.Putter
    21  	spyName             string
    22  	labels              map[string]string
    23  	skipExemplars       bool
    24  	sampleTypes         map[string]*tree.SampleTypeConfig
    25  	stackFrameFormatter StackFrameFormatter
    26  
    27  	cache             tree.LabelsCache
    28  	sampleTypesFilter func(string) bool
    29  }
    30  
    31  type ParserConfig struct {
    32  	Putter              storage.Putter
    33  	SpyName             string
    34  	Labels              map[string]string
    35  	SkipExemplars       bool
    36  	SampleTypes         map[string]*tree.SampleTypeConfig
    37  	StackFrameFormatter StackFrameFormatter
    38  }
    39  
    40  func NewParser(config ParserConfig) *Parser {
    41  	if config.StackFrameFormatter == nil {
    42  		config.StackFrameFormatter = &UnsafeFunctionNameFormatter{}
    43  	}
    44  	return &Parser{
    45  		putter:              config.Putter,
    46  		spyName:             config.SpyName,
    47  		labels:              config.Labels,
    48  		sampleTypes:         config.SampleTypes,
    49  		skipExemplars:       config.SkipExemplars,
    50  		stackFrameFormatter: config.StackFrameFormatter,
    51  
    52  		cache:             make(tree.LabelsCache),
    53  		sampleTypesFilter: filterKnownSamples(config.SampleTypes),
    54  	}
    55  }
    56  
    57  func filterKnownSamples(sampleTypes map[string]*tree.SampleTypeConfig) func(string) bool {
    58  	return func(s string) bool {
    59  		_, ok := sampleTypes[s]
    60  		return ok
    61  	}
    62  }
    63  
    64  func (p *Parser) Reset() { p.cache = make(tree.LabelsCache) }
    65  
    66  func (p *Parser) ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) error {
    67  	b := bytes.NewReader(bs)
    68  	return DecodePool(b, func(profile *tree.Profile) error {
    69  		return p.Convert(ctx, startTime, endTime, profile, cumulativeOnly)
    70  	})
    71  }
    72  
    73  func (p *Parser) Convert(ctx context.Context, startTime, endTime time.Time, profile *tree.Profile, cumulativeOnly bool) error {
    74  	return p.iterate(profile, cumulativeOnly, func(vt *tree.ValueType, l tree.Labels, t *tree.Tree) (keep bool, err error) {
    75  		if vt.Type >= int64(len(profile.StringTable)) {
    76  			return false, fmt.Errorf("sample value type is invalid: %d", vt.Type)
    77  		}
    78  		sampleType := profile.StringTable[vt.Type]
    79  		sampleTypeConfig, ok := p.sampleTypes[sampleType]
    80  		if !ok {
    81  			return false, fmt.Errorf("sample value type is unknown")
    82  		}
    83  		pi := storage.PutInput{
    84  			StartTime: startTime,
    85  			EndTime:   endTime,
    86  			SpyName:   p.spyName,
    87  			Val:       t,
    88  		}
    89  		// Cumulative profiles require two consecutive samples,
    90  		// therefore we have to cache this trie.
    91  		if sampleTypeConfig.Cumulative {
    92  			prev, found := p.load(vt.Type, l)
    93  			if !found {
    94  				// Keep the current entry in cache.
    95  				return true, nil
    96  			}
    97  			// Take diff with the previous tree.
    98  			// The result is written to prev, t is not changed.
    99  			pi.Val = prev.Diff(t)
   100  		}
   101  		pi.AggregationType = sampleTypeConfig.Aggregation
   102  		if sampleTypeConfig.Sampled {
   103  			pi.SampleRate = sampleRate(profile)
   104  		}
   105  		if sampleTypeConfig.DisplayName != "" {
   106  			sampleType = sampleTypeConfig.DisplayName
   107  		}
   108  		if sampleTypeConfig.Units != "" {
   109  			pi.Units = sampleTypeConfig.Units
   110  		} else {
   111  			// TODO(petethepig): this conversion is questionable
   112  			pi.Units = metadata.Units(profile.StringTable[vt.Unit])
   113  		}
   114  		pi.Key = p.buildName(sampleType, profile.ResolveLabels(l))
   115  		err = p.putter.Put(ctx, &pi)
   116  		return sampleTypeConfig.Cumulative, err
   117  	})
   118  }
   119  
   120  func sampleRate(p *tree.Profile) uint32 {
   121  	if p.Period <= 0 || p.PeriodType == nil {
   122  		return 0
   123  	}
   124  	sampleUnit := time.Nanosecond
   125  	switch p.StringTable[p.PeriodType.Unit] {
   126  	case "microseconds":
   127  		sampleUnit = time.Microsecond
   128  	case "milliseconds":
   129  		sampleUnit = time.Millisecond
   130  	case "seconds":
   131  		sampleUnit = time.Second
   132  	}
   133  	return uint32(time.Second / (sampleUnit * time.Duration(p.Period)))
   134  }
   135  
   136  func (p *Parser) buildName(sampleTypeName string, labels map[string]string) *segment.Key {
   137  	for k, v := range p.labels {
   138  		labels[k] = v
   139  	}
   140  	labels["__name__"] += "." + sampleTypeName
   141  	return segment.NewKey(labels)
   142  }
   143  
   144  func (p *Parser) load(sampleType int64, labels tree.Labels) (*tree.Tree, bool) {
   145  	e, ok := p.cache.Get(sampleType, labels.Hash())
   146  	if !ok {
   147  		return nil, false
   148  	}
   149  	return e.Tree, true
   150  }
   151  
   152  func (p *Parser) iterate(x *tree.Profile, cumulativeOnly bool, fn func(vt *tree.ValueType, l tree.Labels, t *tree.Tree) (keep bool, err error)) error {
   153  	c := make(tree.LabelsCache)
   154  	p.readTrees(x, c, tree.NewFinder(x), cumulativeOnly)
   155  	for sampleType, entries := range c {
   156  		if t, ok := x.ResolveSampleType(sampleType); ok {
   157  			for h, e := range entries {
   158  				keep, err := fn(t, e.Labels, e.Tree)
   159  				if err != nil {
   160  					return err
   161  				}
   162  				if !keep {
   163  					c.Remove(sampleType, h)
   164  				}
   165  			}
   166  		}
   167  	}
   168  	p.cache = c
   169  	return nil
   170  }
   171  
   172  // readTrees generates trees from the profile populating c.
   173  func (p *Parser) readTrees(x *tree.Profile, c tree.LabelsCache, f tree.Finder, cumulativeOnly bool) {
   174  	// SampleType value indexes.
   175  	indexes := make([]int, 0, len(x.SampleType))
   176  	// Corresponding type IDs used as the main cache keys.
   177  	types := make([]int64, 0, len(x.SampleType))
   178  	for i, s := range x.SampleType {
   179  		st := x.StringTable[s.Type]
   180  		if p.sampleTypesFilter != nil && p.sampleTypesFilter(st) {
   181  			if !cumulativeOnly || (cumulativeOnly && p.sampleTypes[st].Cumulative) {
   182  				indexes = append(indexes, i)
   183  				types = append(types, s.Type)
   184  			}
   185  		}
   186  	}
   187  	if len(indexes) == 0 {
   188  		return
   189  	}
   190  	stack := make([][]byte, 0, 16)
   191  	for _, s := range x.Sample {
   192  		for i := len(s.LocationId) - 1; i >= 0; i-- {
   193  			// Resolve stack.
   194  			loc, ok := f.FindLocation(s.LocationId[i])
   195  			if !ok {
   196  				continue
   197  			}
   198  			// Multiple line indicates this location has inlined functions,
   199  			// where the last entry represents the caller into which the
   200  			// preceding entries were inlined.
   201  			//
   202  			// E.g., if memcpy() is inlined into printf:
   203  			//    line[0].function_name == "memcpy"
   204  			//    line[1].function_name == "printf"
   205  			//
   206  			// Therefore iteration goes in reverse order.
   207  			for j := len(loc.Line) - 1; j >= 0; j-- {
   208  				fn, ok := f.FindFunction(loc.Line[j].FunctionId)
   209  				if !ok || x.StringTable[fn.Name] == "" {
   210  					continue
   211  				}
   212  				sf := p.stackFrameFormatter.format(x, fn, loc.Line[j])
   213  				stack = append(stack, sf)
   214  			}
   215  		}
   216  		// Insert tree nodes.
   217  		for i, vi := range indexes {
   218  			v := uint64(s.Value[vi])
   219  			if v == 0 {
   220  				continue
   221  			}
   222  			// If the sample has ProfileID label, it belongs to an exemplar.
   223  			if j := labelIndex(x, s.Label, segment.ProfileIDLabelName); j >= 0 {
   224  				// Regardless of whether we should skip exemplars or not, the value
   225  				// should be appended to the exemplar baseline profile (w/o ProfileID label).
   226  				c.GetOrCreateTree(types[i], tree.CutLabel(s.Label, j)).InsertStack(stack, v)
   227  				if p.skipExemplars {
   228  					continue
   229  				}
   230  			}
   231  			c.GetOrCreateTree(types[i], s.Label).InsertStack(stack, v)
   232  		}
   233  		stack = stack[:0]
   234  	}
   235  }
   236  
   237  func labelIndex(p *tree.Profile, labels tree.Labels, key string) int {
   238  	for i, label := range labels {
   239  		if n, ok := p.ResolveLabelName(label); ok && n == key {
   240  			return i
   241  		}
   242  	}
   243  	return -1
   244  }