github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/pprof/streaming/parser_streaming.go (about) 1 package streaming 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "context" 7 "fmt" 8 "github.com/pyroscope-io/pyroscope/pkg/stackbuilder" 9 "github.com/pyroscope-io/pyroscope/pkg/storage" 10 "github.com/pyroscope-io/pyroscope/pkg/storage/metadata" 11 "github.com/pyroscope-io/pyroscope/pkg/storage/segment" 12 "github.com/pyroscope-io/pyroscope/pkg/storage/tree" 13 "github.com/pyroscope-io/pyroscope/pkg/util/arenahelper" 14 "github.com/valyala/bytebufferpool" 15 "io" 16 "runtime/debug" 17 "strings" 18 "sync" 19 "time" 20 ) 21 22 type StackFormatter int 23 24 const ( 25 // StackFrameFormatterGo use only function name 26 StackFrameFormatterGo = 0 27 // StackFrameFormatterRuby use function name, line number, function name 28 StackFrameFormatterRuby = 1 29 ) 30 31 var PPROFBufPool = bytebufferpool.Pool{} 32 33 type ParserConfig struct { 34 Putter storage.Putter 35 SpyName string 36 Labels map[string]string 37 SampleTypes map[string]*tree.SampleTypeConfig 38 Formatter StackFormatter 39 ArenasEnabled bool 40 } 41 42 type VTStreamingParser struct { 43 putter storage.Putter 44 wbf stackbuilder.WriteBatchFactory 45 spyName string 46 labels map[string]string 47 48 sampleTypesConfig map[string]*tree.SampleTypeConfig 49 Formatter StackFormatter 50 ArenasEnabled bool 51 52 sampleTypesFilter func(string) bool 53 54 startTime time.Time 55 endTime time.Time 56 ctx context.Context 57 profile []byte 58 prev bool 59 cumulative bool 60 cumulativeOnly bool 61 62 nStrings int 63 profileIDLabelIndex int64 64 nFunctions int 65 nLocations int 66 nSampleTypes int 67 period int64 68 periodType valueType 69 sampleTypes []valueType 70 strings []istr 71 functions []function 72 locations []location 73 74 lineRefs locationFunctions 75 76 indexes []int 77 types []int64 78 79 tmpSample sample 80 81 finder finder 82 previousCache LabelsCache 83 newCache LabelsCache 84 wbCache writeBatchCache 85 arena arenahelper.ArenaWrapper 86 } 87 88 func NewStreamingParser(config ParserConfig) *VTStreamingParser { 89 res := &VTStreamingParser{} 90 res.Reset(config) 91 return res 92 } 93 func (p *VTStreamingParser) FreeArena() { 94 arenahelper.Free(p.arena) 95 } 96 func (p *VTStreamingParser) ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) (err error) { 97 p.startTime = startTime 98 p.endTime = endTime 99 p.ctx = ctx 100 p.cumulativeOnly = cumulativeOnly 101 102 err = decompress(bs, func(profile []byte) error { 103 p.profile = profile 104 err := p.parsePprofDecompressed() 105 p.profile = nil 106 return err 107 }) 108 p.ctx = nil 109 return err 110 } 111 112 func (p *VTStreamingParser) parsePprofDecompressed() (err error) { 113 defer func() { 114 if recover() != nil { 115 err = fmt.Errorf(fmt.Sprintf("parse panic %s", debug.Stack())) 116 } 117 }() 118 119 if err = p.countStructs(); err != nil { 120 return err 121 } 122 if err = p.parseFunctionsAndLocations(); err != nil { 123 return err 124 } 125 if !p.haveKnownSampleTypes() { 126 return nil 127 } 128 129 p.newCache.Reset() 130 if err = p.parseSamples(); err != nil { 131 return err 132 } 133 return p.iterate(p.put) 134 } 135 136 // step 1 137 // - parse periodType 138 // - parse sampleType 139 // - count number of locations, functions, strings 140 func (p *VTStreamingParser) countStructs() error { 141 err := p.UnmarshalVTProfile(p.profile, opFlagCountStructs) 142 if err == nil { 143 p.functions = grow(p.arena, p.functions, p.nFunctions) 144 p.locations = grow(p.arena, p.locations, p.nLocations) 145 p.strings = grow(p.arena, p.strings, p.nStrings) 146 p.sampleTypes = grow(p.arena, p.sampleTypes, p.nSampleTypes) 147 p.profileIDLabelIndex = 0 148 } 149 return err 150 } 151 152 func (p *VTStreamingParser) parseFunctionsAndLocations() error { 153 p.lineRefs.reset(p.arena, p.nLocations) 154 err := p.UnmarshalVTProfile(p.profile, opFlagParseStructs) 155 if err == nil { 156 p.finder = newFinder(p.functions, p.locations) 157 for i := range p.sampleTypes { 158 p.sampleTypes[i].resolvedType = string(p.string(p.sampleTypes[i].Type)) 159 p.sampleTypes[i].resolvedUnit = string(p.string(p.sampleTypes[i].unit)) 160 } 161 p.periodType.resolvedType = string(p.string(p.periodType.Type)) 162 p.periodType.resolvedUnit = string(p.string(p.periodType.unit)) 163 } 164 return err 165 } 166 167 func (p *VTStreamingParser) haveKnownSampleTypes() bool { 168 p.indexes = grow(p.arena, p.indexes, len(p.sampleTypes)) 169 p.types = grow(p.arena, p.types, len(p.sampleTypes)) 170 for i, s := range p.sampleTypes { 171 ssType := p.string(s.Type) 172 173 st := string(ssType) 174 if p.sampleTypesFilter(st) { 175 if !p.cumulativeOnly || (p.cumulativeOnly && p.sampleTypesConfig[st].Cumulative) { 176 p.indexes = arenahelper.AppendA(p.indexes, i, p.arena) 177 p.types = arenahelper.AppendA(p.types, s.Type, p.arena) 178 } 179 } 180 } 181 if len(p.indexes) == 0 { 182 return false 183 } 184 return true 185 } 186 187 func (p *VTStreamingParser) parseSamples() error { 188 return p.UnmarshalVTProfile(p.profile, opFlagParseSamples) 189 } 190 191 func (p *VTStreamingParser) addStackLocation(lID uint64) error { 192 loc, ok := p.finder.FindLocation(lID) 193 if ok { 194 ref := loc.linesRef 195 lines := p.lineRefs.lines[(ref >> 32):(ref & 0xffffffff)] 196 for i := len(lines) - 1; i >= 0; i-- { 197 if err := p.addStackFrame(&lines[i]); err != nil { 198 return err 199 } 200 } 201 } 202 return nil 203 } 204 205 func (p *VTStreamingParser) addStackFrame(l *line) error { 206 fID := l.functionID 207 f, ok := p.finder.FindFunction(fID) 208 if !ok { 209 return nil 210 } 211 var frame []byte 212 switch p.Formatter { 213 case StackFrameFormatterRuby: 214 pFuncName := p.strings[f.name] 215 pFileName := p.strings[f.filename] 216 frame = []byte(fmt.Sprintf("%s:%d - %s", 217 p.profile[(pFileName>>32):(pFileName&0xffffffff)], 218 l.line, 219 p.profile[(pFuncName>>32):(pFuncName&0xffffffff)])) 220 default: 221 case StackFrameFormatterGo: 222 pFuncName := p.strings[f.name] 223 frame = p.profile[(pFuncName >> 32):(pFuncName & 0xffffffff)] 224 } 225 pSample := &p.tmpSample 226 if len(pSample.tmpStack) < cap(pSample.tmpStack) { 227 pSample.tmpStack = append(pSample.tmpStack, frame) 228 } else { 229 pSample.tmpStack = arenahelper.AppendA(pSample.tmpStack, frame, p.arena) 230 } 231 return nil 232 } 233 234 func (p *VTStreamingParser) string(i int64) []byte { 235 ps := p.strings[i] 236 return p.profile[(ps >> 32):(ps & 0xffffffff)] 237 } 238 239 func (p *VTStreamingParser) resolveSampleType(v int64) (*valueType, bool) { 240 for i := range p.sampleTypes { 241 if p.sampleTypes[i].Type == v { 242 return &p.sampleTypes[i], true 243 } 244 } 245 return nil, false 246 } 247 248 func (p *VTStreamingParser) iterate(fn func(stIndex int, st *valueType, l Labels, tr *tree.Tree) (keep bool, err error)) error { 249 err := p.newCache.iterate(func(stIndex int, l Labels, lh uint64, tr *tree.Tree) error { 250 t := &p.sampleTypes[stIndex] 251 keep, err := fn(stIndex, t, l, tr) 252 if err != nil { 253 return err 254 } 255 if !keep { 256 p.newCache.Remove(stIndex, lh) 257 } 258 return nil 259 }) 260 if err != nil { 261 return err 262 } 263 p.previousCache, p.newCache = p.newCache, p.previousCache 264 p.newCache.Reset() 265 return nil 266 } 267 268 func (p *VTStreamingParser) createTrees() { 269 for _, vi := range p.indexes { 270 v := uint64(p.tmpSample.tmpValues[vi]) 271 if v == 0 { 272 continue 273 } 274 s := p.tmpSample.tmpStack 275 if j := findLabelIndex(p.tmpSample.tmpLabels, p.profileIDLabelIndex); j >= 0 { 276 p.newCache.GetOrCreateTree(vi, CutLabel(p.arena, p.tmpSample.tmpLabels, j)).InsertStackA(s, v) 277 } 278 p.newCache.GetOrCreateTree(vi, p.tmpSample.tmpLabels).InsertStackA(s, v) 279 } 280 } 281 282 func (p *VTStreamingParser) put(stIndex int, st *valueType, l Labels, t *tree.Tree) (keep bool, err error) { 283 sampleTypeBytes := st.resolvedType 284 sampleType := sampleTypeBytes 285 sampleTypeConfig, ok := p.sampleTypesConfig[sampleType] 286 if !ok { 287 return false, fmt.Errorf("sample value type is unknown") 288 } 289 pi := storage.PutInput{ 290 StartTime: p.startTime, 291 EndTime: p.endTime, 292 SpyName: p.spyName, 293 Val: t, 294 } 295 // Cumulative profiles require two consecutive samples, 296 // therefore we have to cache this trie. 297 if sampleTypeConfig.Cumulative { 298 prev, found := p.previousCache.Get(stIndex, l.Hash()) 299 if !found { 300 // Keep the current entry in cache. 301 return true, nil 302 } 303 // Take diff with the previous tree. 304 // The result is written to prev, t is not changed. 305 pi.Val = prev.Diff(t) 306 } 307 pi.AggregationType = sampleTypeConfig.Aggregation 308 if sampleTypeConfig.Sampled { 309 pi.SampleRate = p.sampleRate() 310 } 311 if sampleTypeConfig.DisplayName != "" { 312 sampleType = sampleTypeConfig.DisplayName 313 } 314 if sampleTypeConfig.Units != "" { 315 pi.Units = sampleTypeConfig.Units 316 } else { 317 // TODO(petethepig): this conversion is questionable 318 unitsBytes := st.resolvedUnit 319 pi.Units = metadata.Units(unitsBytes) 320 if err != nil { 321 return false, err 322 } 323 } 324 pi.Key = p.buildName(sampleType, p.ResolveLabels(l)) 325 err = p.putter.Put(p.ctx, &pi) 326 return sampleTypeConfig.Cumulative, err 327 } 328 329 var vtStreamingParserPool = sync.Pool{New: func() any { 330 return &VTStreamingParser{} 331 }} 332 333 func VTStreamingParserFromPool(config ParserConfig) *VTStreamingParser { 334 res := vtStreamingParserPool.Get().(*VTStreamingParser) 335 res.Reset(config) 336 return res 337 } 338 339 func (p *VTStreamingParser) ResetCache() { 340 p.previousCache.Reset() 341 p.newCache.Reset() 342 } 343 344 func (p *VTStreamingParser) ReturnToPool() { 345 if p != nil { 346 vtStreamingParserPool.Put(p) 347 } 348 } 349 350 func (p *VTStreamingParser) ResolveLabels(l Labels) map[string]string { 351 m := make(map[string]string, len(l)) 352 for _, label := range l { 353 k := label >> 32 354 if k != 0 { 355 v := label & 0xffffffff 356 sk := p.string(int64(k)) 357 sv := p.string(int64(v)) 358 m[string(sk)] = string(sv) 359 } 360 } 361 return m 362 } 363 364 func (p *VTStreamingParser) buildName(sampleTypeName string, labels map[string]string) *segment.Key { 365 for k, v := range p.labels { 366 labels[k] = v 367 } 368 labels["__name__"] += "." + sampleTypeName 369 return segment.NewKey(labels) 370 } 371 372 func (p *VTStreamingParser) getAppMetadata(sampleTypeIndex int) (string, metadata.Metadata) { 373 st := &p.sampleTypes[sampleTypeIndex] 374 sampleType := st.resolvedType 375 sampleTypeConfig, ok := p.sampleTypesConfig[sampleType] 376 if !ok { 377 return "", metadata.Metadata{} 378 } 379 if sampleTypeConfig.DisplayName != "" { 380 sampleType = sampleTypeConfig.DisplayName 381 } 382 name := p.labels["__name__"] 383 if name == "" { 384 return "", metadata.Metadata{} 385 } 386 md := metadata.Metadata{SpyName: p.spyName} 387 if sampleTypeConfig.Sampled { 388 md.SampleRate = p.sampleRate() 389 } 390 if sampleTypeConfig.DisplayName != "" { 391 sampleType = sampleTypeConfig.DisplayName 392 } 393 if sampleTypeConfig.Units != "" { 394 md.Units = sampleTypeConfig.Units 395 } else { 396 // TODO(petethepig): this conversion is questionable 397 unitsBytes := st.resolvedUnit 398 md.Units = metadata.Units(unitsBytes) 399 } 400 md.AggregationType = sampleTypeConfig.Aggregation 401 return name + "." + sampleType, md 402 } 403 404 func (p *VTStreamingParser) sampleRate() uint32 { 405 if p.period <= 0 || p.periodType.unit <= 0 { 406 return 0 407 } 408 sampleUnit := time.Nanosecond 409 u := p.periodType.resolvedUnit 410 411 switch u { 412 case "microseconds": 413 sampleUnit = time.Microsecond 414 case "milliseconds": 415 sampleUnit = time.Millisecond 416 case "seconds": 417 sampleUnit = time.Second 418 } 419 420 return uint32(time.Second / (sampleUnit * time.Duration(p.period))) 421 } 422 423 func (p *VTStreamingParser) Reset(config ParserConfig) { 424 p.putter = config.Putter 425 p.spyName = config.SpyName 426 p.labels = config.Labels 427 p.sampleTypesConfig = config.SampleTypes 428 p.previousCache.Reset() 429 p.newCache.Reset() 430 p.wbCache.reset() 431 432 p.sampleTypesFilter = filterKnownSamples(config.SampleTypes) 433 p.Formatter = config.Formatter 434 p.ArenasEnabled = config.ArenasEnabled 435 if config.ArenasEnabled { 436 p.arena = arenahelper.NewArenaWrapper() 437 p.previousCache.arena = p.arena 438 p.newCache.arena = p.arena 439 } 440 } 441 442 func filterKnownSamples(sampleTypes map[string]*tree.SampleTypeConfig) func(string) bool { 443 return func(s string) bool { 444 _, ok := sampleTypes[s] 445 return ok 446 } 447 } 448 449 func findLabelIndex(tmpLabels []uint64, k int64) int { 450 for i, l := range tmpLabels { 451 lk := int64(l >> 32) 452 if lk == k { 453 return i 454 } 455 } 456 return -1 457 } 458 459 func grow[T any](a arenahelper.ArenaWrapper, it []T, n int) []T { 460 if it == nil || n > cap(it) { 461 return arenahelper.MakeSlice[T](a, 0, n) 462 } 463 return it[:0] 464 } 465 466 func StackFrameFormatterForSpyName(spyName string) StackFormatter { 467 if spyName == "rbspy" || spyName == "pyspy" { 468 return StackFrameFormatterRuby 469 } 470 return StackFrameFormatterGo 471 } 472 473 func decompress(bs []byte, f func([]byte) error) error { 474 var err error 475 if len(bs) < 2 { 476 err = fmt.Errorf("failed to read pprof profile header") 477 } else if bs[0] == 0x1f && bs[1] == 0x8b { 478 var gzipr *gzip.Reader 479 gzipr, err = gzip.NewReader(bytes.NewReader(bs)) 480 if err != nil { 481 err = fmt.Errorf("failed to create pprof profile zip reader: %w", err) 482 } else { 483 buf := PPROFBufPool.Get() 484 if _, err = io.Copy(buf, gzipr); err != nil { 485 err = fmt.Errorf("failed to decompress gzip: %w", err) 486 } else { 487 err = f(buf.Bytes()) 488 } 489 PPROFBufPool.Put(buf) 490 _ = gzipr.Close() 491 } 492 } else { 493 err = f(bs) 494 } 495 return err 496 } 497 498 func stack2string(stack [][]byte, sep string) string { 499 sb := strings.Builder{} 500 for i, frame := range stack { 501 if i != 0 { 502 sb.WriteString(sep) 503 } 504 sb.Write(frame) 505 } 506 return sb.String() 507 }