github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/pprof/streaming/parser_streaming.go (about)

     1  package streaming
     2  
     3  import (
     4  	"bytes"
     5  	"compress/gzip"
     6  	"context"
     7  	"fmt"
     8  	"github.com/pyroscope-io/pyroscope/pkg/stackbuilder"
     9  	"github.com/pyroscope-io/pyroscope/pkg/storage"
    10  	"github.com/pyroscope-io/pyroscope/pkg/storage/metadata"
    11  	"github.com/pyroscope-io/pyroscope/pkg/storage/segment"
    12  	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
    13  	"github.com/pyroscope-io/pyroscope/pkg/util/arenahelper"
    14  	"github.com/valyala/bytebufferpool"
    15  	"io"
    16  	"runtime/debug"
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  )
    21  
    22  type StackFormatter int
    23  
    24  const (
    25  	// StackFrameFormatterGo use only function name
    26  	StackFrameFormatterGo = 0
    27  	// StackFrameFormatterRuby use function name, line number, function name
    28  	StackFrameFormatterRuby = 1
    29  )
    30  
    31  var PPROFBufPool = bytebufferpool.Pool{}
    32  
    33  type ParserConfig struct {
    34  	Putter        storage.Putter
    35  	SpyName       string
    36  	Labels        map[string]string
    37  	SampleTypes   map[string]*tree.SampleTypeConfig
    38  	Formatter     StackFormatter
    39  	ArenasEnabled bool
    40  }
    41  
    42  type VTStreamingParser struct {
    43  	putter  storage.Putter
    44  	wbf     stackbuilder.WriteBatchFactory
    45  	spyName string
    46  	labels  map[string]string
    47  
    48  	sampleTypesConfig map[string]*tree.SampleTypeConfig
    49  	Formatter         StackFormatter
    50  	ArenasEnabled     bool
    51  
    52  	sampleTypesFilter func(string) bool
    53  
    54  	startTime      time.Time
    55  	endTime        time.Time
    56  	ctx            context.Context
    57  	profile        []byte
    58  	prev           bool
    59  	cumulative     bool
    60  	cumulativeOnly bool
    61  
    62  	nStrings            int
    63  	profileIDLabelIndex int64
    64  	nFunctions          int
    65  	nLocations          int
    66  	nSampleTypes        int
    67  	period              int64
    68  	periodType          valueType
    69  	sampleTypes         []valueType
    70  	strings             []istr
    71  	functions           []function
    72  	locations           []location
    73  
    74  	lineRefs locationFunctions
    75  
    76  	indexes []int
    77  	types   []int64
    78  
    79  	tmpSample sample
    80  
    81  	finder        finder
    82  	previousCache LabelsCache
    83  	newCache      LabelsCache
    84  	wbCache       writeBatchCache
    85  	arena         arenahelper.ArenaWrapper
    86  }
    87  
    88  func NewStreamingParser(config ParserConfig) *VTStreamingParser {
    89  	res := &VTStreamingParser{}
    90  	res.Reset(config)
    91  	return res
    92  }
    93  func (p *VTStreamingParser) FreeArena() {
    94  	arenahelper.Free(p.arena)
    95  }
    96  func (p *VTStreamingParser) ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) (err error) {
    97  	p.startTime = startTime
    98  	p.endTime = endTime
    99  	p.ctx = ctx
   100  	p.cumulativeOnly = cumulativeOnly
   101  
   102  	err = decompress(bs, func(profile []byte) error {
   103  		p.profile = profile
   104  		err := p.parsePprofDecompressed()
   105  		p.profile = nil
   106  		return err
   107  	})
   108  	p.ctx = nil
   109  	return err
   110  }
   111  
   112  func (p *VTStreamingParser) parsePprofDecompressed() (err error) {
   113  	defer func() {
   114  		if recover() != nil {
   115  			err = fmt.Errorf(fmt.Sprintf("parse panic %s", debug.Stack()))
   116  		}
   117  	}()
   118  
   119  	if err = p.countStructs(); err != nil {
   120  		return err
   121  	}
   122  	if err = p.parseFunctionsAndLocations(); err != nil {
   123  		return err
   124  	}
   125  	if !p.haveKnownSampleTypes() {
   126  		return nil
   127  	}
   128  
   129  	p.newCache.Reset()
   130  	if err = p.parseSamples(); err != nil {
   131  		return err
   132  	}
   133  	return p.iterate(p.put)
   134  }
   135  
   136  // step 1
   137  // - parse periodType
   138  // - parse sampleType
   139  // - count number of locations, functions, strings
   140  func (p *VTStreamingParser) countStructs() error {
   141  	err := p.UnmarshalVTProfile(p.profile, opFlagCountStructs)
   142  	if err == nil {
   143  		p.functions = grow(p.arena, p.functions, p.nFunctions)
   144  		p.locations = grow(p.arena, p.locations, p.nLocations)
   145  		p.strings = grow(p.arena, p.strings, p.nStrings)
   146  		p.sampleTypes = grow(p.arena, p.sampleTypes, p.nSampleTypes)
   147  		p.profileIDLabelIndex = 0
   148  	}
   149  	return err
   150  }
   151  
   152  func (p *VTStreamingParser) parseFunctionsAndLocations() error {
   153  	p.lineRefs.reset(p.arena, p.nLocations)
   154  	err := p.UnmarshalVTProfile(p.profile, opFlagParseStructs)
   155  	if err == nil {
   156  		p.finder = newFinder(p.functions, p.locations)
   157  		for i := range p.sampleTypes {
   158  			p.sampleTypes[i].resolvedType = string(p.string(p.sampleTypes[i].Type))
   159  			p.sampleTypes[i].resolvedUnit = string(p.string(p.sampleTypes[i].unit))
   160  		}
   161  		p.periodType.resolvedType = string(p.string(p.periodType.Type))
   162  		p.periodType.resolvedUnit = string(p.string(p.periodType.unit))
   163  	}
   164  	return err
   165  }
   166  
   167  func (p *VTStreamingParser) haveKnownSampleTypes() bool {
   168  	p.indexes = grow(p.arena, p.indexes, len(p.sampleTypes))
   169  	p.types = grow(p.arena, p.types, len(p.sampleTypes))
   170  	for i, s := range p.sampleTypes {
   171  		ssType := p.string(s.Type)
   172  
   173  		st := string(ssType)
   174  		if p.sampleTypesFilter(st) {
   175  			if !p.cumulativeOnly || (p.cumulativeOnly && p.sampleTypesConfig[st].Cumulative) {
   176  				p.indexes = arenahelper.AppendA(p.indexes, i, p.arena)
   177  				p.types = arenahelper.AppendA(p.types, s.Type, p.arena)
   178  			}
   179  		}
   180  	}
   181  	if len(p.indexes) == 0 {
   182  		return false
   183  	}
   184  	return true
   185  }
   186  
   187  func (p *VTStreamingParser) parseSamples() error {
   188  	return p.UnmarshalVTProfile(p.profile, opFlagParseSamples)
   189  }
   190  
   191  func (p *VTStreamingParser) addStackLocation(lID uint64) error {
   192  	loc, ok := p.finder.FindLocation(lID)
   193  	if ok {
   194  		ref := loc.linesRef
   195  		lines := p.lineRefs.lines[(ref >> 32):(ref & 0xffffffff)]
   196  		for i := len(lines) - 1; i >= 0; i-- {
   197  			if err := p.addStackFrame(&lines[i]); err != nil {
   198  				return err
   199  			}
   200  		}
   201  	}
   202  	return nil
   203  }
   204  
   205  func (p *VTStreamingParser) addStackFrame(l *line) error {
   206  	fID := l.functionID
   207  	f, ok := p.finder.FindFunction(fID)
   208  	if !ok {
   209  		return nil
   210  	}
   211  	var frame []byte
   212  	switch p.Formatter {
   213  	case StackFrameFormatterRuby:
   214  		pFuncName := p.strings[f.name]
   215  		pFileName := p.strings[f.filename]
   216  		frame = []byte(fmt.Sprintf("%s:%d - %s",
   217  			p.profile[(pFileName>>32):(pFileName&0xffffffff)],
   218  			l.line,
   219  			p.profile[(pFuncName>>32):(pFuncName&0xffffffff)]))
   220  	default:
   221  	case StackFrameFormatterGo:
   222  		pFuncName := p.strings[f.name]
   223  		frame = p.profile[(pFuncName >> 32):(pFuncName & 0xffffffff)]
   224  	}
   225  	pSample := &p.tmpSample
   226  	if len(pSample.tmpStack) < cap(pSample.tmpStack) {
   227  		pSample.tmpStack = append(pSample.tmpStack, frame)
   228  	} else {
   229  		pSample.tmpStack = arenahelper.AppendA(pSample.tmpStack, frame, p.arena)
   230  	}
   231  	return nil
   232  }
   233  
   234  func (p *VTStreamingParser) string(i int64) []byte {
   235  	ps := p.strings[i]
   236  	return p.profile[(ps >> 32):(ps & 0xffffffff)]
   237  }
   238  
   239  func (p *VTStreamingParser) resolveSampleType(v int64) (*valueType, bool) {
   240  	for i := range p.sampleTypes {
   241  		if p.sampleTypes[i].Type == v {
   242  			return &p.sampleTypes[i], true
   243  		}
   244  	}
   245  	return nil, false
   246  }
   247  
   248  func (p *VTStreamingParser) iterate(fn func(stIndex int, st *valueType, l Labels, tr *tree.Tree) (keep bool, err error)) error {
   249  	err := p.newCache.iterate(func(stIndex int, l Labels, lh uint64, tr *tree.Tree) error {
   250  		t := &p.sampleTypes[stIndex]
   251  		keep, err := fn(stIndex, t, l, tr)
   252  		if err != nil {
   253  			return err
   254  		}
   255  		if !keep {
   256  			p.newCache.Remove(stIndex, lh)
   257  		}
   258  		return nil
   259  	})
   260  	if err != nil {
   261  		return err
   262  	}
   263  	p.previousCache, p.newCache = p.newCache, p.previousCache
   264  	p.newCache.Reset()
   265  	return nil
   266  }
   267  
   268  func (p *VTStreamingParser) createTrees() {
   269  	for _, vi := range p.indexes {
   270  		v := uint64(p.tmpSample.tmpValues[vi])
   271  		if v == 0 {
   272  			continue
   273  		}
   274  		s := p.tmpSample.tmpStack
   275  		if j := findLabelIndex(p.tmpSample.tmpLabels, p.profileIDLabelIndex); j >= 0 {
   276  			p.newCache.GetOrCreateTree(vi, CutLabel(p.arena, p.tmpSample.tmpLabels, j)).InsertStackA(s, v)
   277  		}
   278  		p.newCache.GetOrCreateTree(vi, p.tmpSample.tmpLabels).InsertStackA(s, v)
   279  	}
   280  }
   281  
   282  func (p *VTStreamingParser) put(stIndex int, st *valueType, l Labels, t *tree.Tree) (keep bool, err error) {
   283  	sampleTypeBytes := st.resolvedType
   284  	sampleType := sampleTypeBytes
   285  	sampleTypeConfig, ok := p.sampleTypesConfig[sampleType]
   286  	if !ok {
   287  		return false, fmt.Errorf("sample value type is unknown")
   288  	}
   289  	pi := storage.PutInput{
   290  		StartTime: p.startTime,
   291  		EndTime:   p.endTime,
   292  		SpyName:   p.spyName,
   293  		Val:       t,
   294  	}
   295  	// Cumulative profiles require two consecutive samples,
   296  	// therefore we have to cache this trie.
   297  	if sampleTypeConfig.Cumulative {
   298  		prev, found := p.previousCache.Get(stIndex, l.Hash())
   299  		if !found {
   300  			// Keep the current entry in cache.
   301  			return true, nil
   302  		}
   303  		// Take diff with the previous tree.
   304  		// The result is written to prev, t is not changed.
   305  		pi.Val = prev.Diff(t)
   306  	}
   307  	pi.AggregationType = sampleTypeConfig.Aggregation
   308  	if sampleTypeConfig.Sampled {
   309  		pi.SampleRate = p.sampleRate()
   310  	}
   311  	if sampleTypeConfig.DisplayName != "" {
   312  		sampleType = sampleTypeConfig.DisplayName
   313  	}
   314  	if sampleTypeConfig.Units != "" {
   315  		pi.Units = sampleTypeConfig.Units
   316  	} else {
   317  		// TODO(petethepig): this conversion is questionable
   318  		unitsBytes := st.resolvedUnit
   319  		pi.Units = metadata.Units(unitsBytes)
   320  		if err != nil {
   321  			return false, err
   322  		}
   323  	}
   324  	pi.Key = p.buildName(sampleType, p.ResolveLabels(l))
   325  	err = p.putter.Put(p.ctx, &pi)
   326  	return sampleTypeConfig.Cumulative, err
   327  }
   328  
   329  var vtStreamingParserPool = sync.Pool{New: func() any {
   330  	return &VTStreamingParser{}
   331  }}
   332  
   333  func VTStreamingParserFromPool(config ParserConfig) *VTStreamingParser {
   334  	res := vtStreamingParserPool.Get().(*VTStreamingParser)
   335  	res.Reset(config)
   336  	return res
   337  }
   338  
   339  func (p *VTStreamingParser) ResetCache() {
   340  	p.previousCache.Reset()
   341  	p.newCache.Reset()
   342  }
   343  
   344  func (p *VTStreamingParser) ReturnToPool() {
   345  	if p != nil {
   346  		vtStreamingParserPool.Put(p)
   347  	}
   348  }
   349  
   350  func (p *VTStreamingParser) ResolveLabels(l Labels) map[string]string {
   351  	m := make(map[string]string, len(l))
   352  	for _, label := range l {
   353  		k := label >> 32
   354  		if k != 0 {
   355  			v := label & 0xffffffff
   356  			sk := p.string(int64(k))
   357  			sv := p.string(int64(v))
   358  			m[string(sk)] = string(sv)
   359  		}
   360  	}
   361  	return m
   362  }
   363  
   364  func (p *VTStreamingParser) buildName(sampleTypeName string, labels map[string]string) *segment.Key {
   365  	for k, v := range p.labels {
   366  		labels[k] = v
   367  	}
   368  	labels["__name__"] += "." + sampleTypeName
   369  	return segment.NewKey(labels)
   370  }
   371  
   372  func (p *VTStreamingParser) getAppMetadata(sampleTypeIndex int) (string, metadata.Metadata) {
   373  	st := &p.sampleTypes[sampleTypeIndex]
   374  	sampleType := st.resolvedType
   375  	sampleTypeConfig, ok := p.sampleTypesConfig[sampleType]
   376  	if !ok {
   377  		return "", metadata.Metadata{}
   378  	}
   379  	if sampleTypeConfig.DisplayName != "" {
   380  		sampleType = sampleTypeConfig.DisplayName
   381  	}
   382  	name := p.labels["__name__"]
   383  	if name == "" {
   384  		return "", metadata.Metadata{}
   385  	}
   386  	md := metadata.Metadata{SpyName: p.spyName}
   387  	if sampleTypeConfig.Sampled {
   388  		md.SampleRate = p.sampleRate()
   389  	}
   390  	if sampleTypeConfig.DisplayName != "" {
   391  		sampleType = sampleTypeConfig.DisplayName
   392  	}
   393  	if sampleTypeConfig.Units != "" {
   394  		md.Units = sampleTypeConfig.Units
   395  	} else {
   396  		// TODO(petethepig): this conversion is questionable
   397  		unitsBytes := st.resolvedUnit
   398  		md.Units = metadata.Units(unitsBytes)
   399  	}
   400  	md.AggregationType = sampleTypeConfig.Aggregation
   401  	return name + "." + sampleType, md
   402  }
   403  
   404  func (p *VTStreamingParser) sampleRate() uint32 {
   405  	if p.period <= 0 || p.periodType.unit <= 0 {
   406  		return 0
   407  	}
   408  	sampleUnit := time.Nanosecond
   409  	u := p.periodType.resolvedUnit
   410  
   411  	switch u {
   412  	case "microseconds":
   413  		sampleUnit = time.Microsecond
   414  	case "milliseconds":
   415  		sampleUnit = time.Millisecond
   416  	case "seconds":
   417  		sampleUnit = time.Second
   418  	}
   419  
   420  	return uint32(time.Second / (sampleUnit * time.Duration(p.period)))
   421  }
   422  
   423  func (p *VTStreamingParser) Reset(config ParserConfig) {
   424  	p.putter = config.Putter
   425  	p.spyName = config.SpyName
   426  	p.labels = config.Labels
   427  	p.sampleTypesConfig = config.SampleTypes
   428  	p.previousCache.Reset()
   429  	p.newCache.Reset()
   430  	p.wbCache.reset()
   431  
   432  	p.sampleTypesFilter = filterKnownSamples(config.SampleTypes)
   433  	p.Formatter = config.Formatter
   434  	p.ArenasEnabled = config.ArenasEnabled
   435  	if config.ArenasEnabled {
   436  		p.arena = arenahelper.NewArenaWrapper()
   437  		p.previousCache.arena = p.arena
   438  		p.newCache.arena = p.arena
   439  	}
   440  }
   441  
   442  func filterKnownSamples(sampleTypes map[string]*tree.SampleTypeConfig) func(string) bool {
   443  	return func(s string) bool {
   444  		_, ok := sampleTypes[s]
   445  		return ok
   446  	}
   447  }
   448  
   449  func findLabelIndex(tmpLabels []uint64, k int64) int {
   450  	for i, l := range tmpLabels {
   451  		lk := int64(l >> 32)
   452  		if lk == k {
   453  			return i
   454  		}
   455  	}
   456  	return -1
   457  }
   458  
   459  func grow[T any](a arenahelper.ArenaWrapper, it []T, n int) []T {
   460  	if it == nil || n > cap(it) {
   461  		return arenahelper.MakeSlice[T](a, 0, n)
   462  	}
   463  	return it[:0]
   464  }
   465  
   466  func StackFrameFormatterForSpyName(spyName string) StackFormatter {
   467  	if spyName == "rbspy" || spyName == "pyspy" {
   468  		return StackFrameFormatterRuby
   469  	}
   470  	return StackFrameFormatterGo
   471  }
   472  
   473  func decompress(bs []byte, f func([]byte) error) error {
   474  	var err error
   475  	if len(bs) < 2 {
   476  		err = fmt.Errorf("failed to read pprof profile header")
   477  	} else if bs[0] == 0x1f && bs[1] == 0x8b {
   478  		var gzipr *gzip.Reader
   479  		gzipr, err = gzip.NewReader(bytes.NewReader(bs))
   480  		if err != nil {
   481  			err = fmt.Errorf("failed to create pprof profile zip reader: %w", err)
   482  		} else {
   483  			buf := PPROFBufPool.Get()
   484  			if _, err = io.Copy(buf, gzipr); err != nil {
   485  				err = fmt.Errorf("failed to decompress gzip: %w", err)
   486  			} else {
   487  				err = f(buf.Bytes())
   488  			}
   489  			PPROFBufPool.Put(buf)
   490  			_ = gzipr.Close()
   491  		}
   492  	} else {
   493  		err = f(bs)
   494  	}
   495  	return err
   496  }
   497  
   498  func stack2string(stack [][]byte, sep string) string {
   499  	sb := strings.Builder{}
   500  	for i, frame := range stack {
   501  		if i != 0 {
   502  			sb.WriteString(sep)
   503  		}
   504  		sb.Write(frame)
   505  	}
   506  	return sb.String()
   507  }