github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/helper/profile/pyroscope/jfr/parser.go (about)

     1  // Copyright 2023 iLogtail Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package jfr
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"regexp"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/cespare/xxhash"
    27  	"github.com/pyroscope-io/jfr-parser/parser"
    28  	"github.com/pyroscope-io/pyroscope/pkg/storage/segment"
    29  	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
    30  
    31  	"github.com/alibaba/ilogtail/pkg/helper/profile"
    32  	"github.com/alibaba/ilogtail/pkg/logger"
    33  )
    34  
    35  const (
    36  	_ = iota
    37  	sampleTypeCPU
    38  	sampleTypeWall
    39  	sampleTypeInTLABObjects
    40  	sampleTypeInTLABBytes
    41  	sampleTypeOutTLABObjects
    42  	sampleTypeOutTLABBytes
    43  	sampleTypeLockSamples
    44  	sampleTypeLockDuration
    45  )
    46  
    47  func (r *RawProfile) ParseJFR(ctx context.Context, meta *profile.Meta, body io.Reader, jfrLabels *LabelsSnapshot, cb profile.CallbackFunc) (err error) {
    48  	if meta.SampleRate > 0 {
    49  		meta.Tags["_sample_rate_"] = strconv.FormatUint(uint64(meta.SampleRate), 10)
    50  	}
    51  	chunks, err := parser.ParseWithOptions(body, &parser.ChunkParseOptions{
    52  		CPoolProcessor: processSymbols,
    53  	})
    54  	if err != nil {
    55  		return fmt.Errorf("unable to parse JFR format: %w", err)
    56  	}
    57  	for _, c := range chunks {
    58  		r.parseChunk(ctx, meta, c, jfrLabels, cb)
    59  	}
    60  	return nil
    61  }
    62  
    63  // revive:disable-next-line:cognitive-complexity necessary complexity
    64  func (r *RawProfile) parseChunk(ctx context.Context, meta *profile.Meta, c parser.Chunk, jfrLabels *LabelsSnapshot, convertCb profile.CallbackFunc) {
    65  	stackMap := make(map[uint64]*profile.Stack)
    66  	valMap := make(map[uint64][]uint64)
    67  	labelMap := make(map[uint64]map[string]string)
    68  	typeMap := make(map[uint64][]string)
    69  	unitMap := make(map[uint64][]string)
    70  	aggtypeMap := make(map[uint64][]string)
    71  
    72  	var event string
    73  	for _, e := range c.Events {
    74  		if as, ok := e.(*parser.ActiveSetting); ok {
    75  			if as.Name == "event" {
    76  				event = as.Value
    77  			}
    78  		}
    79  	}
    80  	cache := make(tree.LabelsCache)
    81  	for contextID, events := range groupEventsByContextID(c.Events) {
    82  		labels := getContextLabels(contextID, jfrLabels)
    83  		lh := labels.Hash()
    84  		for _, e := range events {
    85  			switch obj := e.(type) {
    86  			case *parser.ExecutionSample:
    87  				if fs := frames(obj.StackTrace); fs != nil {
    88  					if obj.State.Name == "STATE_RUNNABLE" {
    89  						cache.GetOrCreateTreeByHash(sampleTypeCPU, labels, lh).InsertStackString(fs, 1)
    90  					}
    91  					cache.GetOrCreateTreeByHash(sampleTypeWall, labels, lh).InsertStackString(fs, 1)
    92  				}
    93  			case *parser.ObjectAllocationInNewTLAB:
    94  				if fs := frames(obj.StackTrace); fs != nil {
    95  					cache.GetOrCreateTreeByHash(sampleTypeInTLABObjects, labels, lh).InsertStackString(fs, 1)
    96  					cache.GetOrCreateTreeByHash(sampleTypeInTLABBytes, labels, lh).InsertStackString(fs, uint64(obj.TLABSize))
    97  				}
    98  			case *parser.ObjectAllocationOutsideTLAB:
    99  				if fs := frames(obj.StackTrace); fs != nil {
   100  					cache.GetOrCreateTreeByHash(sampleTypeOutTLABObjects, labels, lh).InsertStackString(fs, 1)
   101  					cache.GetOrCreateTreeByHash(sampleTypeOutTLABBytes, labels, lh).InsertStackString(fs, uint64(obj.AllocationSize))
   102  				}
   103  			case *parser.JavaMonitorEnter:
   104  				if fs := frames(obj.StackTrace); fs != nil {
   105  					cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1)
   106  					cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(obj.Duration))
   107  				}
   108  			case *parser.ThreadPark:
   109  				if fs := frames(obj.StackTrace); fs != nil {
   110  					cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1)
   111  					cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(obj.Duration))
   112  				}
   113  			}
   114  		}
   115  	}
   116  	for sampleType, entries := range cache {
   117  		for _, e := range entries {
   118  			if i := labelIndex(jfrLabels, e.Labels, segment.ProfileIDLabelName); i != -1 {
   119  				cutLabels := tree.CutLabel(e.Labels, i)
   120  				cache.GetOrCreateTree(sampleType, cutLabels).Merge(e.Tree)
   121  			}
   122  		}
   123  	}
   124  	cb := func(n string, labels tree.Labels, t *tree.Tree, u profile.Units) {
   125  		t.IterateStacks(func(name string, self uint64, stack []string) {
   126  			unit := u
   127  			if u == profile.SamplesUnits {
   128  				unit = profile.NanosecondsUnit
   129  				self *= uint64(time.Second.Nanoseconds() / int64(meta.SampleRate))
   130  			}
   131  
   132  			id := xxhash.Sum64String(strings.Join(stack, ""))
   133  			stackMap[id] = &profile.Stack{
   134  				Name:  profile.FormatPositionAndName(name, profile.FormatType(meta.SpyName)),
   135  				Stack: profile.FormatPostionAndNames(stack[1:], profile.FormatType(meta.SpyName)),
   136  			}
   137  			aggtypeMap[id] = append(aggtypeMap[id], string(meta.AggregationType))
   138  			typeMap[id] = append(typeMap[id], n)
   139  			unitMap[id] = append(unitMap[id], string(unit))
   140  			valMap[id] = append(valMap[id], self)
   141  			labelMap[id] = buildKey(meta.Tags, labels, jfrLabels).Labels()
   142  		})
   143  	}
   144  	for sampleType, entries := range cache {
   145  		if sampleType == sampleTypeWall && event != "wall" {
   146  			continue
   147  		}
   148  		n := getName(sampleType, event)
   149  		units := getUnits(sampleType)
   150  		for _, e := range entries {
   151  			cb(n, e.Labels, e.Tree, units)
   152  		}
   153  	}
   154  
   155  	for id, fs := range stackMap {
   156  		if len(valMap[id]) == 0 || len(typeMap[id]) == 0 || len(unitMap[id]) == 0 || len(aggtypeMap[id]) == 0 || len(labelMap[id]) == 0 {
   157  			logger.Warning(ctx, "PPROF_PROFILE_ALARM", "stack don't have enough meta or values", fs)
   158  			continue
   159  		}
   160  		convertCb(id, fs, valMap[id], typeMap[id], unitMap[id], aggtypeMap[id], meta.StartTime.UnixNano(), meta.EndTime.UnixNano(), labelMap[id])
   161  	}
   162  }
   163  
   164  func getName(sampleType int64, event string) string {
   165  	switch sampleType {
   166  	case sampleTypeCPU:
   167  		if event == "cpu" || event == "itimer" || event == "wall" {
   168  			profile := event
   169  			if event == "wall" {
   170  				profile = "cpu"
   171  			}
   172  			return profile
   173  		}
   174  	case sampleTypeWall:
   175  		return "wall"
   176  	case sampleTypeInTLABObjects:
   177  		return "alloc_in_new_tlab_objects"
   178  	case sampleTypeInTLABBytes:
   179  		return "alloc_in_new_tlab_bytes"
   180  	case sampleTypeOutTLABObjects:
   181  		return "alloc_outside_tlab_objects"
   182  	case sampleTypeOutTLABBytes:
   183  		return "alloc_outside_tlab_bytes"
   184  	case sampleTypeLockSamples:
   185  		return "lock_count"
   186  	case sampleTypeLockDuration:
   187  		return "lock_duration"
   188  	}
   189  	return "unknown"
   190  }
   191  
   192  func getUnits(sampleType int64) profile.Units {
   193  	switch sampleType {
   194  	case sampleTypeCPU:
   195  		return profile.SamplesUnits
   196  	case sampleTypeWall:
   197  		return profile.SamplesUnits
   198  	case sampleTypeInTLABObjects:
   199  		return profile.ObjectsUnit
   200  	case sampleTypeInTLABBytes:
   201  		return profile.BytesUnit
   202  	case sampleTypeOutTLABObjects:
   203  		return profile.ObjectsUnit
   204  	case sampleTypeOutTLABBytes:
   205  		return profile.BytesUnit
   206  	case sampleTypeLockSamples:
   207  		return profile.LockSamplesUnits
   208  	case sampleTypeLockDuration:
   209  		return profile.LockNanosecondsUnits
   210  	}
   211  	return profile.SamplesUnits
   212  }
   213  
   214  func buildKey(appLabels map[string]string, labels tree.Labels, snapshot *LabelsSnapshot) *segment.Key {
   215  	finalLabels := map[string]string{}
   216  	for k, v := range appLabels {
   217  		finalLabels[k] = v
   218  	}
   219  	for _, v := range labels {
   220  		ks, ok := snapshot.Strings[v.Key]
   221  		if !ok {
   222  			continue
   223  		}
   224  		vs, ok := snapshot.Strings[v.Str]
   225  		if !ok {
   226  			continue
   227  		}
   228  		finalLabels[ks] = vs
   229  	}
   230  	return segment.NewKey(finalLabels)
   231  }
   232  
   233  func getContextLabels(contextID int64, labels *LabelsSnapshot) tree.Labels {
   234  	if contextID == 0 {
   235  		return nil
   236  	}
   237  	var ctx *Context
   238  	var ok bool
   239  	if ctx, ok = labels.Contexts[contextID]; !ok {
   240  		return nil
   241  	}
   242  	res := make(tree.Labels, 0, len(ctx.Labels))
   243  	for k, v := range ctx.Labels {
   244  		res = append(res, &tree.Label{Key: k, Str: v})
   245  	}
   246  	return res
   247  }
   248  func labelIndex(s *LabelsSnapshot, labels tree.Labels, key string) int {
   249  	for i, label := range labels {
   250  		if n, ok := s.Strings[label.Key]; ok {
   251  			if n == key {
   252  				return i
   253  			}
   254  		}
   255  	}
   256  	return -1
   257  }
   258  
   259  func groupEventsByContextID(events []parser.Parseable) map[int64][]parser.Parseable {
   260  	res := make(map[int64][]parser.Parseable)
   261  	for _, e := range events {
   262  		switch obj := e.(type) {
   263  		case *parser.ExecutionSample:
   264  			res[obj.ContextId] = append(res[obj.ContextId], e)
   265  		case *parser.ObjectAllocationInNewTLAB:
   266  			res[obj.ContextId] = append(res[obj.ContextId], e)
   267  		case *parser.ObjectAllocationOutsideTLAB:
   268  			res[obj.ContextId] = append(res[obj.ContextId], e)
   269  		case *parser.JavaMonitorEnter:
   270  			res[obj.ContextId] = append(res[obj.ContextId], e)
   271  		case *parser.ThreadPark:
   272  			res[obj.ContextId] = append(res[obj.ContextId], e)
   273  		}
   274  	}
   275  	return res
   276  }
   277  
   278  func frames(st *parser.StackTrace) []string {
   279  	if st == nil {
   280  		return nil
   281  	}
   282  	frames := make([]string, 0, len(st.Frames))
   283  	for i := len(st.Frames) - 1; i >= 0; i-- {
   284  		f := st.Frames[i]
   285  		// TODO(abeaumont): Add support for line numbers.
   286  		if f.Method != nil && f.Method.Type != nil && f.Method.Type.Name != nil && f.Method.Name != nil {
   287  			frames = append(frames, f.Method.Type.Name.String+"."+f.Method.Name.String)
   288  		}
   289  	}
   290  	return frames
   291  }
   292  
   293  // jdk/internal/reflect/GeneratedMethodAccessor31
   294  var generatedMethodAccessor = regexp.MustCompile(`^(jdk/internal/reflect/GeneratedMethodAccessor)(\d+)$`)
   295  
   296  // org/example/rideshare/OrderService$$Lambda$669.0x0000000800fd7318.run
   297  var lambdaGeneratedEnclosingClass = regexp.MustCompile(`^(.+\$\$Lambda\$)\d+[./](0x[\da-f]+|\d+)$`)
   298  
   299  // libzstd-jni-1.5.1-16931311898282279136.so.Java_com_github_luben_zstd_ZstdInputStreamNoFinalizer_decompressStream
   300  var zstdJniSoLibName = regexp.MustCompile(`^(\.?/tmp/)?(libzstd-jni-\d+\.\d+\.\d+-)(\d+)(\.so)( \(deleted\))?$`)
   301  
   302  // ./tmp/libamazonCorrettoCryptoProvider109b39cf33c563eb.so
   303  var amazonCorrettoCryptoProvider = regexp.MustCompile(`^(\.?/tmp/)?(libamazonCorrettoCryptoProvider)([0-9a-f]{16})(\.so)( \(deleted\))?$`)
   304  
   305  // libasyncProfiler-linux-arm64-17b9a1d8156277a98ccc871afa9a8f69215f92.so
   306  var pyroscopeAsyncProfiler = regexp.MustCompile(
   307  	`^(\.?/tmp/)?(libasyncProfiler)-(linux-arm64|linux-musl-x64|linux-x64|macos)-(17b9a1d8156277a98ccc871afa9a8f69215f92)(\.so)( \(deleted\))?$`)
   308  
   309  func mergeJVMGeneratedClasses(frame string) string {
   310  	frame = generatedMethodAccessor.ReplaceAllString(frame, "${1}_")
   311  	frame = lambdaGeneratedEnclosingClass.ReplaceAllString(frame, "${1}_")
   312  	frame = zstdJniSoLibName.ReplaceAllString(frame, "libzstd-jni-_.so")
   313  	frame = amazonCorrettoCryptoProvider.ReplaceAllString(frame, "libamazonCorrettoCryptoProvider_.so")
   314  	frame = pyroscopeAsyncProfiler.ReplaceAllString(frame, "libasyncProfiler-_.so")
   315  	return frame
   316  }
   317  
   318  func processSymbols(meta parser.ClassMetadata, cpool *parser.CPool) {
   319  	if meta.Name == "jdk.types.Symbol" {
   320  		for _, v := range cpool.Pool {
   321  			sym := v.(*parser.Symbol)
   322  			sym.String = mergeJVMGeneratedClasses(sym.String)
   323  		}
   324  	}
   325  }