github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/jfr/parser.go (about)

     1  package jfr
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"github.com/hashicorp/go-multierror"
     7  	"github.com/pyroscope-io/jfr-parser/parser"
     8  	"github.com/pyroscope-io/pyroscope/pkg/storage"
     9  	"github.com/pyroscope-io/pyroscope/pkg/storage/metadata"
    10  	"github.com/pyroscope-io/pyroscope/pkg/storage/segment"
    11  	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
    12  	"io"
    13  	"regexp"
    14  )
    15  
    16  const (
    17  	_ = iota
    18  	sampleTypeCPU
    19  	sampleTypeWall
    20  	sampleTypeInTLABObjects
    21  	sampleTypeInTLABBytes
    22  	sampleTypeOutTLABObjects
    23  	sampleTypeOutTLABBytes
    24  	sampleTypeLockSamples
    25  	sampleTypeLockDuration
    26  	sampleTypeLiveObject
    27  )
    28  
    29  func ParseJFR(ctx context.Context, s storage.Putter, body io.Reader, pi *storage.PutInput, jfrLabels *LabelsSnapshot) (err error) {
    30  	chunks, err := parser.ParseWithOptions(body, &parser.ChunkParseOptions{
    31  		CPoolProcessor: processSymbols,
    32  	})
    33  	if err != nil {
    34  		return fmt.Errorf("unable to parse JFR format: %w", err)
    35  	}
    36  	for _, c := range chunks {
    37  		if pErr := parse(ctx, c, s, pi, jfrLabels); pErr != nil {
    38  			err = multierror.Append(err, pErr)
    39  		}
    40  	}
    41  	return err
    42  }
    43  
    44  // revive:disable-next-line:cognitive-complexity necessary complexity
    45  func parse(ctx context.Context, c parser.Chunk, s storage.Putter, piOriginal *storage.PutInput, jfrLabels *LabelsSnapshot) (err error) {
    46  	var event string
    47  	for _, e := range c.Events {
    48  		if as, ok := e.(*parser.ActiveSetting); ok {
    49  			if as.Name == "event" {
    50  				event = as.Value
    51  			}
    52  		}
    53  	}
    54  	cache := make(tree.LabelsCache)
    55  	for contextID, events := range groupEventsByContextID(c.Events) {
    56  		labels := getContextLabels(contextID, jfrLabels)
    57  		lh := labels.Hash()
    58  		for _, e := range events {
    59  			switch e.(type) {
    60  			case *parser.ExecutionSample:
    61  				es := e.(*parser.ExecutionSample)
    62  				if fs := frames(es.StackTrace); fs != nil {
    63  					if es.State.Name == "STATE_RUNNABLE" {
    64  						cache.GetOrCreateTreeByHash(sampleTypeCPU, labels, lh).InsertStackString(fs, 1)
    65  					}
    66  					cache.GetOrCreateTreeByHash(sampleTypeWall, labels, lh).InsertStackString(fs, 1)
    67  				}
    68  			case *parser.ObjectAllocationInNewTLAB:
    69  				oa := e.(*parser.ObjectAllocationInNewTLAB)
    70  				if fs := frames(oa.StackTrace); fs != nil {
    71  					cache.GetOrCreateTreeByHash(sampleTypeInTLABObjects, labels, lh).InsertStackString(fs, 1)
    72  					cache.GetOrCreateTreeByHash(sampleTypeInTLABBytes, labels, lh).InsertStackString(fs, uint64(oa.TLABSize))
    73  				}
    74  			case *parser.ObjectAllocationOutsideTLAB:
    75  				oa := e.(*parser.ObjectAllocationOutsideTLAB)
    76  				if fs := frames(oa.StackTrace); fs != nil {
    77  					cache.GetOrCreateTreeByHash(sampleTypeOutTLABObjects, labels, lh).InsertStackString(fs, 1)
    78  					cache.GetOrCreateTreeByHash(sampleTypeOutTLABBytes, labels, lh).InsertStackString(fs, uint64(oa.AllocationSize))
    79  				}
    80  			case *parser.JavaMonitorEnter:
    81  				jme := e.(*parser.JavaMonitorEnter)
    82  				if fs := frames(jme.StackTrace); fs != nil {
    83  					cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1)
    84  					cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(jme.Duration))
    85  				}
    86  			case *parser.ThreadPark:
    87  				tp := e.(*parser.ThreadPark)
    88  				if fs := frames(tp.StackTrace); fs != nil {
    89  					cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1)
    90  					cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(tp.Duration))
    91  				}
    92  			case *parser.LiveObject:
    93  				lo := e.(*parser.LiveObject)
    94  				if fs := frames(lo.StackTrace); fs != nil {
    95  					cache.GetOrCreateTreeByHash(sampleTypeLiveObject, labels, lh).InsertStackString(fs, 1)
    96  				}
    97  			}
    98  		}
    99  	}
   100  	for sampleType, entries := range cache {
   101  		for _, e := range entries {
   102  			if i := labelIndex(jfrLabels, e.Labels, segment.ProfileIDLabelName); i != -1 {
   103  				cutLabels := tree.CutLabel(e.Labels, i)
   104  				cache.GetOrCreateTree(sampleType, cutLabels).Merge(e.Tree)
   105  			}
   106  		}
   107  	}
   108  	cb := func(n string, labels tree.Labels, t *tree.Tree, u metadata.Units, at metadata.AggregationType) {
   109  		key := buildKey(n, piOriginal.Key.Labels(), labels, jfrLabels)
   110  		pi := &storage.PutInput{
   111  			StartTime:       piOriginal.StartTime,
   112  			EndTime:         piOriginal.EndTime,
   113  			Key:             key,
   114  			Val:             t,
   115  			SpyName:         piOriginal.SpyName,
   116  			SampleRate:      piOriginal.SampleRate,
   117  			Units:           u,
   118  			AggregationType: at,
   119  		}
   120  		if putErr := s.Put(ctx, pi); putErr != nil {
   121  			err = multierror.Append(err, putErr)
   122  		}
   123  	}
   124  	for sampleType, entries := range cache {
   125  		if sampleType == sampleTypeWall && event != "wall" {
   126  			continue
   127  		}
   128  		n := getName(sampleType, event)
   129  		units := getUnits(sampleType)
   130  		at := aggregationType(sampleType)
   131  		for _, e := range entries {
   132  			cb(n, e.Labels, e.Tree, units, at)
   133  		}
   134  	}
   135  	return err
   136  }
   137  
   138  func getName(sampleType int64, event string) string {
   139  	switch sampleType {
   140  	case sampleTypeCPU:
   141  		if event == "cpu" || event == "itimer" || event == "wall" {
   142  			profile := event
   143  			if event == "wall" {
   144  				profile = "cpu"
   145  			}
   146  			return profile
   147  		}
   148  	case sampleTypeWall:
   149  		return "wall"
   150  	case sampleTypeInTLABObjects:
   151  		return "alloc_in_new_tlab_objects"
   152  	case sampleTypeInTLABBytes:
   153  		return "alloc_in_new_tlab_bytes"
   154  	case sampleTypeOutTLABObjects:
   155  		return "alloc_outside_tlab_objects"
   156  	case sampleTypeOutTLABBytes:
   157  		return "alloc_outside_tlab_bytes"
   158  	case sampleTypeLockSamples:
   159  		return "lock_count"
   160  	case sampleTypeLockDuration:
   161  		return "lock_duration"
   162  	case sampleTypeLiveObject:
   163  		return "live"
   164  	}
   165  	return "unknown"
   166  }
   167  
   168  func aggregationType(sampleType int64) metadata.AggregationType {
   169  	switch sampleType {
   170  	case sampleTypeLiveObject:
   171  		return metadata.AverageAggregationType
   172  	default:
   173  		return metadata.SumAggregationType
   174  	}
   175  }
   176  
   177  func getUnits(sampleType int64) metadata.Units {
   178  	switch sampleType {
   179  	case sampleTypeCPU:
   180  		return metadata.SamplesUnits
   181  	case sampleTypeWall:
   182  		return metadata.SamplesUnits
   183  	case sampleTypeInTLABObjects:
   184  		return metadata.ObjectsUnits
   185  	case sampleTypeInTLABBytes:
   186  		return metadata.BytesUnits
   187  	case sampleTypeOutTLABObjects:
   188  		return metadata.ObjectsUnits
   189  	case sampleTypeOutTLABBytes:
   190  		return metadata.BytesUnits
   191  	case sampleTypeLockSamples:
   192  		return metadata.LockSamplesUnits
   193  	case sampleTypeLockDuration:
   194  		return metadata.LockNanosecondsUnits
   195  	case sampleTypeLiveObject:
   196  		return metadata.ObjectsUnits
   197  	}
   198  	return metadata.SamplesUnits
   199  }
   200  
   201  func buildKey(n string, appLabels map[string]string, labels tree.Labels, snapshot *LabelsSnapshot) *segment.Key {
   202  	finalLabels := map[string]string{}
   203  	for k, v := range appLabels {
   204  		finalLabels[k] = v
   205  	}
   206  	for _, v := range labels {
   207  		ks, ok := snapshot.Strings[v.Key]
   208  		if !ok {
   209  			continue
   210  		}
   211  		vs, ok := snapshot.Strings[v.Str]
   212  		finalLabels[ks] = vs
   213  	}
   214  
   215  	finalLabels["__name__"] += "." + n
   216  	return segment.NewKey(finalLabels)
   217  }
   218  
   219  func getContextLabels(contextID int64, labels *LabelsSnapshot) tree.Labels {
   220  	if contextID == 0 {
   221  		return nil
   222  	}
   223  	var ctx *Context
   224  	var ok bool
   225  	if ctx, ok = labels.Contexts[contextID]; !ok {
   226  		return nil
   227  	}
   228  	res := make(tree.Labels, 0, len(ctx.Labels))
   229  	for k, v := range ctx.Labels {
   230  		res = append(res, &tree.Label{Key: k, Str: v})
   231  	}
   232  	return res
   233  }
   234  func labelIndex(s *LabelsSnapshot, labels tree.Labels, key string) int {
   235  	for i, label := range labels {
   236  		if n, ok := s.Strings[label.Key]; ok {
   237  			if n == key {
   238  				return i
   239  			}
   240  		}
   241  	}
   242  	return -1
   243  }
   244  
   245  func groupEventsByContextID(events []parser.Parseable) map[int64][]parser.Parseable {
   246  	res := make(map[int64][]parser.Parseable)
   247  	for _, e := range events {
   248  		switch e.(type) {
   249  		case *parser.ExecutionSample:
   250  			es := e.(*parser.ExecutionSample)
   251  			res[es.ContextId] = append(res[es.ContextId], e)
   252  		case *parser.ObjectAllocationInNewTLAB:
   253  			oa := e.(*parser.ObjectAllocationInNewTLAB)
   254  			res[oa.ContextId] = append(res[oa.ContextId], e)
   255  		case *parser.ObjectAllocationOutsideTLAB:
   256  			oa := e.(*parser.ObjectAllocationOutsideTLAB)
   257  			res[oa.ContextId] = append(res[oa.ContextId], e)
   258  		case *parser.JavaMonitorEnter:
   259  			jme := e.(*parser.JavaMonitorEnter)
   260  			res[jme.ContextId] = append(res[jme.ContextId], e)
   261  		case *parser.ThreadPark:
   262  			tp := e.(*parser.ThreadPark)
   263  			res[tp.ContextId] = append(res[tp.ContextId], e)
   264  		case *parser.LiveObject:
   265  			res[0] = append(res[0], e)
   266  		}
   267  	}
   268  	return res
   269  }
   270  
   271  func frames(st *parser.StackTrace) []string {
   272  	if st == nil {
   273  		return nil
   274  	}
   275  	frames := make([]string, 0, len(st.Frames))
   276  	for i := len(st.Frames) - 1; i >= 0; i-- {
   277  		f := st.Frames[i]
   278  		// TODO(abeaumont): Add support for line numbers.
   279  		if f.Method != nil && f.Method.Type != nil && f.Method.Type.Name != nil && f.Method.Name != nil {
   280  			frames = append(frames, f.Method.Type.Name.String+"."+f.Method.Name.String)
   281  		}
   282  	}
   283  	return frames
   284  }
   285  
   286  // jdk/internal/reflect/GeneratedMethodAccessor31
   287  var generatedMethodAccessor = regexp.MustCompile("^(jdk/internal/reflect/GeneratedMethodAccessor)(\\d+)$")
   288  
   289  // org/example/rideshare/OrderService$$Lambda$669.0x0000000800fd7318.run
   290  var lambdaGeneratedEnclosingClass = regexp.MustCompile("^(.+\\$\\$Lambda\\$)\\d+[./](0x[\\da-f]+|\\d+)$")
   291  
   292  // libzstd-jni-1.5.1-16931311898282279136.so.Java_com_github_luben_zstd_ZstdInputStreamNoFinalizer_decompressStream
   293  var zstdJniSoLibName = regexp.MustCompile("^(\\.?/tmp/)?(libzstd-jni-\\d+\\.\\d+\\.\\d+-)(\\d+)(\\.so)( \\(deleted\\))?$")
   294  
   295  // ./tmp/libamazonCorrettoCryptoProvider109b39cf33c563eb.so
   296  // ./tmp/amazonCorrettoCryptoProviderNativeLibraries.7382c2f79097f415/libcrypto.so (deleted)
   297  var amazonCorrettoCryptoProvider = regexp.MustCompile("^(\\.?/tmp/)?(lib)?(amazonCorrettoCryptoProvider)(NativeLibraries\\.)?([0-9a-f]{16})" +
   298  	"(/libcrypto|/libamazonCorrettoCryptoProvider)?(\\.so)( \\(deleted\\))?$")
   299  
   300  // libasyncProfiler-linux-arm64-17b9a1d8156277a98ccc871afa9a8f69215f92.so
   301  var pyroscopeAsyncProfiler = regexp.MustCompile(
   302  	"^(\\.?/tmp/)?(libasyncProfiler)-(linux-arm64|linux-musl-x64|linux-x64|macos)-(17b9a1d8156277a98ccc871afa9a8f69215f92)(\\.so)( \\(deleted\\))?$")
   303  
   304  func mergeJVMGeneratedClasses(frame string) string {
   305  	frame = generatedMethodAccessor.ReplaceAllString(frame, "${1}_")
   306  	frame = lambdaGeneratedEnclosingClass.ReplaceAllString(frame, "${1}_")
   307  	frame = zstdJniSoLibName.ReplaceAllString(frame, "libzstd-jni-_.so")
   308  	frame = amazonCorrettoCryptoProvider.ReplaceAllString(frame, "libamazonCorrettoCryptoProvider_.so")
   309  	frame = pyroscopeAsyncProfiler.ReplaceAllString(frame, "libasyncProfiler-_.so")
   310  	return frame
   311  }
   312  
   313  func processSymbols(meta parser.ClassMetadata, cpool *parser.CPool) {
   314  	if meta.Name == "jdk.types.Symbol" {
   315  		for _, v := range cpool.Pool {
   316  			sym := v.(*parser.Symbol)
   317  			sym.String = mergeJVMGeneratedClasses(sym.String)
   318  		}
   319  	}
   320  }