github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/jfr/parser.go (about) 1 package jfr 2 3 import ( 4 "context" 5 "fmt" 6 "github.com/hashicorp/go-multierror" 7 "github.com/pyroscope-io/jfr-parser/parser" 8 "github.com/pyroscope-io/pyroscope/pkg/storage" 9 "github.com/pyroscope-io/pyroscope/pkg/storage/metadata" 10 "github.com/pyroscope-io/pyroscope/pkg/storage/segment" 11 "github.com/pyroscope-io/pyroscope/pkg/storage/tree" 12 "io" 13 "regexp" 14 ) 15 16 const ( 17 _ = iota 18 sampleTypeCPU 19 sampleTypeWall 20 sampleTypeInTLABObjects 21 sampleTypeInTLABBytes 22 sampleTypeOutTLABObjects 23 sampleTypeOutTLABBytes 24 sampleTypeLockSamples 25 sampleTypeLockDuration 26 sampleTypeLiveObject 27 ) 28 29 func ParseJFR(ctx context.Context, s storage.Putter, body io.Reader, pi *storage.PutInput, jfrLabels *LabelsSnapshot) (err error) { 30 chunks, err := parser.ParseWithOptions(body, &parser.ChunkParseOptions{ 31 CPoolProcessor: processSymbols, 32 }) 33 if err != nil { 34 return fmt.Errorf("unable to parse JFR format: %w", err) 35 } 36 for _, c := range chunks { 37 if pErr := parse(ctx, c, s, pi, jfrLabels); pErr != nil { 38 err = multierror.Append(err, pErr) 39 } 40 } 41 return err 42 } 43 44 // revive:disable-next-line:cognitive-complexity necessary complexity 45 func parse(ctx context.Context, c parser.Chunk, s storage.Putter, piOriginal *storage.PutInput, jfrLabels *LabelsSnapshot) (err error) { 46 var event string 47 for _, e := range c.Events { 48 if as, ok := e.(*parser.ActiveSetting); ok { 49 if as.Name == "event" { 50 event = as.Value 51 } 52 } 53 } 54 cache := make(tree.LabelsCache) 55 for contextID, events := range groupEventsByContextID(c.Events) { 56 labels := getContextLabels(contextID, jfrLabels) 57 lh := labels.Hash() 58 for _, e := range events { 59 switch e.(type) { 60 case *parser.ExecutionSample: 61 es := e.(*parser.ExecutionSample) 62 if fs := frames(es.StackTrace); fs != nil { 63 if es.State.Name == "STATE_RUNNABLE" { 64 cache.GetOrCreateTreeByHash(sampleTypeCPU, labels, lh).InsertStackString(fs, 1) 65 } 66 cache.GetOrCreateTreeByHash(sampleTypeWall, labels, lh).InsertStackString(fs, 1) 67 } 68 case *parser.ObjectAllocationInNewTLAB: 69 oa := e.(*parser.ObjectAllocationInNewTLAB) 70 if fs := frames(oa.StackTrace); fs != nil { 71 cache.GetOrCreateTreeByHash(sampleTypeInTLABObjects, labels, lh).InsertStackString(fs, 1) 72 cache.GetOrCreateTreeByHash(sampleTypeInTLABBytes, labels, lh).InsertStackString(fs, uint64(oa.TLABSize)) 73 } 74 case *parser.ObjectAllocationOutsideTLAB: 75 oa := e.(*parser.ObjectAllocationOutsideTLAB) 76 if fs := frames(oa.StackTrace); fs != nil { 77 cache.GetOrCreateTreeByHash(sampleTypeOutTLABObjects, labels, lh).InsertStackString(fs, 1) 78 cache.GetOrCreateTreeByHash(sampleTypeOutTLABBytes, labels, lh).InsertStackString(fs, uint64(oa.AllocationSize)) 79 } 80 case *parser.JavaMonitorEnter: 81 jme := e.(*parser.JavaMonitorEnter) 82 if fs := frames(jme.StackTrace); fs != nil { 83 cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1) 84 cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(jme.Duration)) 85 } 86 case *parser.ThreadPark: 87 tp := e.(*parser.ThreadPark) 88 if fs := frames(tp.StackTrace); fs != nil { 89 cache.GetOrCreateTreeByHash(sampleTypeLockSamples, labels, lh).InsertStackString(fs, 1) 90 cache.GetOrCreateTreeByHash(sampleTypeLockDuration, labels, lh).InsertStackString(fs, uint64(tp.Duration)) 91 } 92 case *parser.LiveObject: 93 lo := e.(*parser.LiveObject) 94 if fs := frames(lo.StackTrace); fs != nil { 95 cache.GetOrCreateTreeByHash(sampleTypeLiveObject, labels, lh).InsertStackString(fs, 1) 96 } 97 } 98 } 99 } 100 for sampleType, entries := range cache { 101 for _, e := range entries { 102 if i := labelIndex(jfrLabels, e.Labels, segment.ProfileIDLabelName); i != -1 { 103 cutLabels := tree.CutLabel(e.Labels, i) 104 cache.GetOrCreateTree(sampleType, cutLabels).Merge(e.Tree) 105 } 106 } 107 } 108 cb := func(n string, labels tree.Labels, t *tree.Tree, u metadata.Units, at metadata.AggregationType) { 109 key := buildKey(n, piOriginal.Key.Labels(), labels, jfrLabels) 110 pi := &storage.PutInput{ 111 StartTime: piOriginal.StartTime, 112 EndTime: piOriginal.EndTime, 113 Key: key, 114 Val: t, 115 SpyName: piOriginal.SpyName, 116 SampleRate: piOriginal.SampleRate, 117 Units: u, 118 AggregationType: at, 119 } 120 if putErr := s.Put(ctx, pi); putErr != nil { 121 err = multierror.Append(err, putErr) 122 } 123 } 124 for sampleType, entries := range cache { 125 if sampleType == sampleTypeWall && event != "wall" { 126 continue 127 } 128 n := getName(sampleType, event) 129 units := getUnits(sampleType) 130 at := aggregationType(sampleType) 131 for _, e := range entries { 132 cb(n, e.Labels, e.Tree, units, at) 133 } 134 } 135 return err 136 } 137 138 func getName(sampleType int64, event string) string { 139 switch sampleType { 140 case sampleTypeCPU: 141 if event == "cpu" || event == "itimer" || event == "wall" { 142 profile := event 143 if event == "wall" { 144 profile = "cpu" 145 } 146 return profile 147 } 148 case sampleTypeWall: 149 return "wall" 150 case sampleTypeInTLABObjects: 151 return "alloc_in_new_tlab_objects" 152 case sampleTypeInTLABBytes: 153 return "alloc_in_new_tlab_bytes" 154 case sampleTypeOutTLABObjects: 155 return "alloc_outside_tlab_objects" 156 case sampleTypeOutTLABBytes: 157 return "alloc_outside_tlab_bytes" 158 case sampleTypeLockSamples: 159 return "lock_count" 160 case sampleTypeLockDuration: 161 return "lock_duration" 162 case sampleTypeLiveObject: 163 return "live" 164 } 165 return "unknown" 166 } 167 168 func aggregationType(sampleType int64) metadata.AggregationType { 169 switch sampleType { 170 case sampleTypeLiveObject: 171 return metadata.AverageAggregationType 172 default: 173 return metadata.SumAggregationType 174 } 175 } 176 177 func getUnits(sampleType int64) metadata.Units { 178 switch sampleType { 179 case sampleTypeCPU: 180 return metadata.SamplesUnits 181 case sampleTypeWall: 182 return metadata.SamplesUnits 183 case sampleTypeInTLABObjects: 184 return metadata.ObjectsUnits 185 case sampleTypeInTLABBytes: 186 return metadata.BytesUnits 187 case sampleTypeOutTLABObjects: 188 return metadata.ObjectsUnits 189 case sampleTypeOutTLABBytes: 190 return metadata.BytesUnits 191 case sampleTypeLockSamples: 192 return metadata.LockSamplesUnits 193 case sampleTypeLockDuration: 194 return metadata.LockNanosecondsUnits 195 case sampleTypeLiveObject: 196 return metadata.ObjectsUnits 197 } 198 return metadata.SamplesUnits 199 } 200 201 func buildKey(n string, appLabels map[string]string, labels tree.Labels, snapshot *LabelsSnapshot) *segment.Key { 202 finalLabels := map[string]string{} 203 for k, v := range appLabels { 204 finalLabels[k] = v 205 } 206 for _, v := range labels { 207 ks, ok := snapshot.Strings[v.Key] 208 if !ok { 209 continue 210 } 211 vs, ok := snapshot.Strings[v.Str] 212 finalLabels[ks] = vs 213 } 214 215 finalLabels["__name__"] += "." + n 216 return segment.NewKey(finalLabels) 217 } 218 219 func getContextLabels(contextID int64, labels *LabelsSnapshot) tree.Labels { 220 if contextID == 0 { 221 return nil 222 } 223 var ctx *Context 224 var ok bool 225 if ctx, ok = labels.Contexts[contextID]; !ok { 226 return nil 227 } 228 res := make(tree.Labels, 0, len(ctx.Labels)) 229 for k, v := range ctx.Labels { 230 res = append(res, &tree.Label{Key: k, Str: v}) 231 } 232 return res 233 } 234 func labelIndex(s *LabelsSnapshot, labels tree.Labels, key string) int { 235 for i, label := range labels { 236 if n, ok := s.Strings[label.Key]; ok { 237 if n == key { 238 return i 239 } 240 } 241 } 242 return -1 243 } 244 245 func groupEventsByContextID(events []parser.Parseable) map[int64][]parser.Parseable { 246 res := make(map[int64][]parser.Parseable) 247 for _, e := range events { 248 switch e.(type) { 249 case *parser.ExecutionSample: 250 es := e.(*parser.ExecutionSample) 251 res[es.ContextId] = append(res[es.ContextId], e) 252 case *parser.ObjectAllocationInNewTLAB: 253 oa := e.(*parser.ObjectAllocationInNewTLAB) 254 res[oa.ContextId] = append(res[oa.ContextId], e) 255 case *parser.ObjectAllocationOutsideTLAB: 256 oa := e.(*parser.ObjectAllocationOutsideTLAB) 257 res[oa.ContextId] = append(res[oa.ContextId], e) 258 case *parser.JavaMonitorEnter: 259 jme := e.(*parser.JavaMonitorEnter) 260 res[jme.ContextId] = append(res[jme.ContextId], e) 261 case *parser.ThreadPark: 262 tp := e.(*parser.ThreadPark) 263 res[tp.ContextId] = append(res[tp.ContextId], e) 264 case *parser.LiveObject: 265 res[0] = append(res[0], e) 266 } 267 } 268 return res 269 } 270 271 func frames(st *parser.StackTrace) []string { 272 if st == nil { 273 return nil 274 } 275 frames := make([]string, 0, len(st.Frames)) 276 for i := len(st.Frames) - 1; i >= 0; i-- { 277 f := st.Frames[i] 278 // TODO(abeaumont): Add support for line numbers. 279 if f.Method != nil && f.Method.Type != nil && f.Method.Type.Name != nil && f.Method.Name != nil { 280 frames = append(frames, f.Method.Type.Name.String+"."+f.Method.Name.String) 281 } 282 } 283 return frames 284 } 285 286 // jdk/internal/reflect/GeneratedMethodAccessor31 287 var generatedMethodAccessor = regexp.MustCompile("^(jdk/internal/reflect/GeneratedMethodAccessor)(\\d+)$") 288 289 // org/example/rideshare/OrderService$$Lambda$669.0x0000000800fd7318.run 290 var lambdaGeneratedEnclosingClass = regexp.MustCompile("^(.+\\$\\$Lambda\\$)\\d+[./](0x[\\da-f]+|\\d+)$") 291 292 // libzstd-jni-1.5.1-16931311898282279136.so.Java_com_github_luben_zstd_ZstdInputStreamNoFinalizer_decompressStream 293 var zstdJniSoLibName = regexp.MustCompile("^(\\.?/tmp/)?(libzstd-jni-\\d+\\.\\d+\\.\\d+-)(\\d+)(\\.so)( \\(deleted\\))?$") 294 295 // ./tmp/libamazonCorrettoCryptoProvider109b39cf33c563eb.so 296 // ./tmp/amazonCorrettoCryptoProviderNativeLibraries.7382c2f79097f415/libcrypto.so (deleted) 297 var amazonCorrettoCryptoProvider = regexp.MustCompile("^(\\.?/tmp/)?(lib)?(amazonCorrettoCryptoProvider)(NativeLibraries\\.)?([0-9a-f]{16})" + 298 "(/libcrypto|/libamazonCorrettoCryptoProvider)?(\\.so)( \\(deleted\\))?$") 299 300 // libasyncProfiler-linux-arm64-17b9a1d8156277a98ccc871afa9a8f69215f92.so 301 var pyroscopeAsyncProfiler = regexp.MustCompile( 302 "^(\\.?/tmp/)?(libasyncProfiler)-(linux-arm64|linux-musl-x64|linux-x64|macos)-(17b9a1d8156277a98ccc871afa9a8f69215f92)(\\.so)( \\(deleted\\))?$") 303 304 func mergeJVMGeneratedClasses(frame string) string { 305 frame = generatedMethodAccessor.ReplaceAllString(frame, "${1}_") 306 frame = lambdaGeneratedEnclosingClass.ReplaceAllString(frame, "${1}_") 307 frame = zstdJniSoLibName.ReplaceAllString(frame, "libzstd-jni-_.so") 308 frame = amazonCorrettoCryptoProvider.ReplaceAllString(frame, "libamazonCorrettoCryptoProvider_.so") 309 frame = pyroscopeAsyncProfiler.ReplaceAllString(frame, "libasyncProfiler-_.so") 310 return frame 311 } 312 313 func processSymbols(meta parser.ClassMetadata, cpool *parser.CPool) { 314 if meta.Name == "jdk.types.Symbol" { 315 for _, v := range cpool.Pool { 316 sym := v.(*parser.Symbol) 317 sym.String = mergeJVMGeneratedClasses(sym.String) 318 } 319 } 320 }