github.com/instana/go-sensor@v1.62.2-0.20240520081010-4919868049e1/autoprofile/internal/pprof/profile/legacy_profile.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements parsers to convert legacy profiles into the 6 // profile.proto format. 7 8 package profile 9 10 import ( 11 "bufio" 12 "bytes" 13 "fmt" 14 "io" 15 "math" 16 "regexp" 17 "strconv" 18 "strings" 19 ) 20 21 var ( 22 countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`) 23 countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`) 24 25 heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`) 26 heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`) 27 28 contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`) 29 30 hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`) 31 32 growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`) 33 34 fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`) 35 36 threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`) 37 threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`) 38 39 procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`) 40 41 briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`) 42 43 // LegacyHeapAllocated instructs the heapz parsers to use the 44 // allocated memory stats instead of the default in-use memory. Note 45 // that tcmalloc doesn't provide all allocated memory, only in-use 46 // stats. 47 LegacyHeapAllocated bool 48 ) 49 50 func isSpaceOrComment(line string) bool { 51 trimmed := strings.TrimSpace(line) 52 return len(trimmed) == 0 || trimmed[0] == '#' 53 } 54 55 // parseGoCount parses a Go count profile (e.g., threadcreate or 56 // goroutine) and returns a new Profile. 57 func parseGoCount(b []byte) (*Profile, error) { 58 r := bytes.NewBuffer(b) 59 60 var line string 61 var err error 62 for { 63 // Skip past comments and empty lines seeking a real header. 64 line, err = r.ReadString('\n') 65 if err != nil { 66 return nil, err 67 } 68 if !isSpaceOrComment(line) { 69 break 70 } 71 } 72 73 m := countStartRE.FindStringSubmatch(line) 74 if m == nil { 75 return nil, errUnrecognized 76 } 77 profileType := m[1] 78 p := &Profile{ 79 PeriodType: &ValueType{Type: profileType, Unit: "count"}, 80 Period: 1, 81 SampleType: []*ValueType{{Type: profileType, Unit: "count"}}, 82 } 83 locations := make(map[uint64]*Location) 84 for { 85 line, err = r.ReadString('\n') 86 if err != nil { 87 if err == io.EOF { 88 break 89 } 90 return nil, err 91 } 92 if isSpaceOrComment(line) { 93 continue 94 } 95 if strings.HasPrefix(line, "---") { 96 break 97 } 98 m := countRE.FindStringSubmatch(line) 99 if m == nil { 100 return nil, errMalformed 101 } 102 n, err := strconv.ParseInt(m[1], 0, 64) 103 if err != nil { 104 return nil, errMalformed 105 } 106 fields := strings.Fields(m[2]) 107 locs := make([]*Location, 0, len(fields)) 108 for _, stk := range fields { 109 addr, err := strconv.ParseUint(stk, 0, 64) 110 if err != nil { 111 return nil, errMalformed 112 } 113 // Adjust all frames by -1 to land on the call instruction. 114 addr-- 115 loc := locations[addr] 116 if loc == nil { 117 loc = &Location{ 118 Address: addr, 119 } 120 locations[addr] = loc 121 p.Location = append(p.Location, loc) 122 } 123 locs = append(locs, loc) 124 } 125 p.Sample = append(p.Sample, &Sample{ 126 Location: locs, 127 Value: []int64{n}, 128 }) 129 } 130 131 if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil { 132 return nil, err 133 } 134 return p, nil 135 } 136 137 // remapLocationIDs ensures there is a location for each address 138 // referenced by a sample, and remaps the samples to point to the new 139 // location ids. 140 func (p *Profile) remapLocationIDs() { 141 seen := make(map[*Location]bool, len(p.Location)) 142 var locs []*Location 143 144 for _, s := range p.Sample { 145 for _, l := range s.Location { 146 if seen[l] { 147 continue 148 } 149 l.ID = uint64(len(locs) + 1) 150 locs = append(locs, l) 151 seen[l] = true 152 } 153 } 154 p.Location = locs 155 } 156 157 func (p *Profile) remapFunctionIDs() { 158 seen := make(map[*Function]bool, len(p.Function)) 159 var fns []*Function 160 161 for _, l := range p.Location { 162 for _, ln := range l.Line { 163 fn := ln.Function 164 if fn == nil || seen[fn] { 165 continue 166 } 167 fn.ID = uint64(len(fns) + 1) 168 fns = append(fns, fn) 169 seen[fn] = true 170 } 171 } 172 p.Function = fns 173 } 174 175 // remapMappingIDs matches location addresses with existing mappings 176 // and updates them appropriately. This is O(N*M), if this ever shows 177 // up as a bottleneck, evaluate sorting the mappings and doing a 178 // binary search, which would make it O(N*log(M)). 179 func (p *Profile) remapMappingIDs() { 180 if len(p.Mapping) == 0 { 181 return 182 } 183 184 // Some profile handlers will incorrectly set regions for the main 185 // executable if its section is remapped. Fix them through heuristics. 186 187 // Remove the initial mapping if named '/anon_hugepage' and has a 188 // consecutive adjacent mapping. 189 if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") { 190 if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start { 191 p.Mapping = p.Mapping[1:] 192 } 193 } 194 195 // Subtract the offset from the start of the main mapping if it 196 // ends up at a recognizable start address. 197 const expectedStart = 0x400000 198 if m := p.Mapping[0]; m.Start-m.Offset == expectedStart { 199 m.Start = expectedStart 200 m.Offset = 0 201 } 202 203 for _, l := range p.Location { 204 if a := l.Address; a != 0 { 205 for _, m := range p.Mapping { 206 if m.Start <= a && a < m.Limit { 207 l.Mapping = m 208 break 209 } 210 } 211 } 212 } 213 214 // Reset all mapping IDs. 215 for i, m := range p.Mapping { 216 m.ID = uint64(i + 1) 217 } 218 } 219 220 var cpuInts = []func([]byte) (uint64, []byte){ 221 get32l, 222 get32b, 223 get64l, 224 get64b, 225 } 226 227 func get32l(b []byte) (uint64, []byte) { 228 if len(b) < 4 { 229 return 0, nil 230 } 231 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:] 232 } 233 234 func get32b(b []byte) (uint64, []byte) { 235 if len(b) < 4 { 236 return 0, nil 237 } 238 return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:] 239 } 240 241 func get64l(b []byte) (uint64, []byte) { 242 if len(b) < 8 { 243 return 0, nil 244 } 245 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:] 246 } 247 248 func get64b(b []byte) (uint64, []byte) { 249 if len(b) < 8 { 250 return 0, nil 251 } 252 return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:] 253 } 254 255 // ParseTracebacks parses a set of tracebacks and returns a newly 256 // populated profile. It will accept any text file and generate a 257 // Profile out of it with any hex addresses it can identify, including 258 // a process map if it can recognize one. Each sample will include a 259 // tag "source" with the addresses recognized in string format. 260 func ParseTracebacks(b []byte) (*Profile, error) { 261 r := bytes.NewBuffer(b) 262 263 p := &Profile{ 264 PeriodType: &ValueType{Type: "trace", Unit: "count"}, 265 Period: 1, 266 SampleType: []*ValueType{ 267 {Type: "trace", Unit: "count"}, 268 }, 269 } 270 271 var sources []string 272 var sloc []*Location 273 274 locs := make(map[uint64]*Location) 275 for { 276 l, err := r.ReadString('\n') 277 if err != nil { 278 if err != io.EOF { 279 return nil, err 280 } 281 if l == "" { 282 break 283 } 284 } 285 if sectionTrigger(l) == memoryMapSection { 286 break 287 } 288 if s, addrs := extractHexAddresses(l); len(s) > 0 { 289 for _, addr := range addrs { 290 // Addresses from stack traces point to the next instruction after 291 // each call. Adjust by -1 to land somewhere on the actual call. 292 addr-- 293 loc := locs[addr] 294 if locs[addr] == nil { 295 loc = &Location{ 296 Address: addr, 297 } 298 p.Location = append(p.Location, loc) 299 locs[addr] = loc 300 } 301 sloc = append(sloc, loc) 302 } 303 304 sources = append(sources, s...) 305 } else { 306 if len(sources) > 0 || len(sloc) > 0 { 307 addTracebackSample(sloc, sources, p) 308 sloc, sources = nil, nil 309 } 310 } 311 } 312 313 // Add final sample to save any leftover data. 314 if len(sources) > 0 || len(sloc) > 0 { 315 addTracebackSample(sloc, sources, p) 316 } 317 318 if err := p.ParseMemoryMap(r); err != nil { 319 return nil, err 320 } 321 return p, nil 322 } 323 324 func addTracebackSample(l []*Location, s []string, p *Profile) { 325 p.Sample = append(p.Sample, 326 &Sample{ 327 Value: []int64{1}, 328 Location: l, 329 Label: map[string][]string{"source": s}, 330 }) 331 } 332 333 // parseCPU parses a profilez legacy profile and returns a newly 334 // populated Profile. 335 // 336 // The general format for profilez samples is a sequence of words in 337 // binary format. The first words are a header with the following data: 338 // 339 // 1st word -- 0 340 // 2nd word -- 3 341 // 3rd word -- 0 if a c++ application, 1 if a java application. 342 // 4th word -- Sampling period (in microseconds). 343 // 5th word -- Padding. 344 func parseCPU(b []byte) (*Profile, error) { 345 var parse func([]byte) (uint64, []byte) 346 var n1, n2, n3, n4, n5 uint64 347 for _, parse = range cpuInts { 348 var tmp []byte 349 n1, tmp = parse(b) 350 n2, tmp = parse(tmp) 351 n3, tmp = parse(tmp) 352 n4, tmp = parse(tmp) 353 n5, tmp = parse(tmp) 354 355 if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 { 356 b = tmp 357 return cpuProfile(b, int64(n4), parse) 358 } 359 } 360 return nil, errUnrecognized 361 } 362 363 // cpuProfile returns a new Profile from C++ profilez data. 364 // b is the profile bytes after the header, period is the profiling 365 // period, and parse is a function to parse 8-byte chunks from the 366 // profile in its native endianness. 367 func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) { 368 p := &Profile{ 369 Period: period * 1000, 370 PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"}, 371 SampleType: []*ValueType{ 372 {Type: "samples", Unit: "count"}, 373 {Type: "cpu", Unit: "nanoseconds"}, 374 }, 375 } 376 var err error 377 if b, _, err = parseCPUSamples(b, parse, true, p); err != nil { 378 return nil, err 379 } 380 381 // If all samples have the same second-to-the-bottom frame, it 382 // strongly suggests that it is an uninteresting artifact of 383 // measurement -- a stack frame pushed by the signal handler. The 384 // bottom frame is always correct as it is picked up from the signal 385 // structure, not the stack. Check if this is the case and if so, 386 // remove. 387 if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 { 388 allSame := true 389 id1 := p.Sample[0].Location[1].Address 390 for _, s := range p.Sample { 391 if len(s.Location) < 2 || id1 != s.Location[1].Address { 392 allSame = false 393 break 394 } 395 } 396 if allSame { 397 for _, s := range p.Sample { 398 s.Location = append(s.Location[:1], s.Location[2:]...) 399 } 400 } 401 } 402 403 if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil { 404 return nil, err 405 } 406 return p, nil 407 } 408 409 // parseCPUSamples parses a collection of profilez samples from a 410 // profile. 411 // 412 // profilez samples are a repeated sequence of stack frames of the 413 // form: 414 // 415 // 1st word -- The number of times this stack was encountered. 416 // 2nd word -- The size of the stack (StackSize). 417 // 3rd word -- The first address on the stack. 418 // ... 419 // StackSize + 2 -- The last address on the stack 420 // 421 // The last stack trace is of the form: 422 // 423 // 1st word -- 0 424 // 2nd word -- 1 425 // 3rd word -- 0 426 // 427 // Addresses from stack traces may point to the next instruction after 428 // each call. Optionally adjust by -1 to land somewhere on the actual 429 // call (except for the leaf, which is not a call). 430 func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) { 431 locs := make(map[uint64]*Location) 432 for len(b) > 0 { 433 var count, nstk uint64 434 count, b = parse(b) 435 nstk, b = parse(b) 436 if b == nil || nstk > uint64(len(b)/4) { 437 return nil, nil, errUnrecognized 438 } 439 var sloc []*Location 440 addrs := make([]uint64, nstk) 441 for i := 0; i < int(nstk); i++ { 442 addrs[i], b = parse(b) 443 } 444 445 if count == 0 && nstk == 1 && addrs[0] == 0 { 446 // End of data marker 447 break 448 } 449 for i, addr := range addrs { 450 if adjust && i > 0 { 451 addr-- 452 } 453 loc := locs[addr] 454 if loc == nil { 455 loc = &Location{ 456 Address: addr, 457 } 458 locs[addr] = loc 459 p.Location = append(p.Location, loc) 460 } 461 sloc = append(sloc, loc) 462 } 463 p.Sample = append(p.Sample, 464 &Sample{ 465 Value: []int64{int64(count), int64(count) * p.Period}, 466 Location: sloc, 467 }) 468 } 469 // Reached the end without finding the EOD marker. 470 return b, locs, nil 471 } 472 473 // parseHeap parses a heapz legacy or a growthz profile and 474 // returns a newly populated Profile. 475 func parseHeap(b []byte) (p *Profile, err error) { 476 r := bytes.NewBuffer(b) 477 l, err := r.ReadString('\n') 478 if err != nil { 479 return nil, errUnrecognized 480 } 481 482 sampling := "" 483 484 if header := heapHeaderRE.FindStringSubmatch(l); header != nil { 485 p = &Profile{ 486 SampleType: []*ValueType{ 487 {Type: "objects", Unit: "count"}, 488 {Type: "space", Unit: "bytes"}, 489 }, 490 PeriodType: &ValueType{Type: "objects", Unit: "bytes"}, 491 } 492 493 var period int64 494 if len(header[6]) > 0 { 495 if period, err = strconv.ParseInt(header[6], 10, 64); err != nil { 496 return nil, errUnrecognized 497 } 498 } 499 500 switch header[5] { 501 case "heapz_v2", "heap_v2": 502 sampling, p.Period = "v2", period 503 case "heapprofile": 504 sampling, p.Period = "", 1 505 case "heap": 506 sampling, p.Period = "v2", period/2 507 default: 508 return nil, errUnrecognized 509 } 510 } else if header = growthHeaderRE.FindStringSubmatch(l); header != nil { 511 p = &Profile{ 512 SampleType: []*ValueType{ 513 {Type: "objects", Unit: "count"}, 514 {Type: "space", Unit: "bytes"}, 515 }, 516 PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"}, 517 Period: 1, 518 } 519 } else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil { 520 p = &Profile{ 521 SampleType: []*ValueType{ 522 {Type: "objects", Unit: "count"}, 523 {Type: "space", Unit: "bytes"}, 524 }, 525 PeriodType: &ValueType{Type: "allocations", Unit: "count"}, 526 Period: 1, 527 } 528 } else { 529 return nil, errUnrecognized 530 } 531 532 if LegacyHeapAllocated { 533 for _, st := range p.SampleType { 534 st.Type = "alloc_" + st.Type 535 } 536 } else { 537 for _, st := range p.SampleType { 538 st.Type = "inuse_" + st.Type 539 } 540 } 541 542 locs := make(map[uint64]*Location) 543 for { 544 l, err = r.ReadString('\n') 545 if err != nil { 546 if err != io.EOF { 547 return nil, err 548 } 549 550 if l == "" { 551 break 552 } 553 } 554 555 if isSpaceOrComment(l) { 556 continue 557 } 558 l = strings.TrimSpace(l) 559 560 if sectionTrigger(l) != unrecognizedSection { 561 break 562 } 563 564 value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling) 565 if err != nil { 566 return nil, err 567 } 568 var sloc []*Location 569 for _, addr := range addrs { 570 // Addresses from stack traces point to the next instruction after 571 // each call. Adjust by -1 to land somewhere on the actual call. 572 addr-- 573 loc := locs[addr] 574 if locs[addr] == nil { 575 loc = &Location{ 576 Address: addr, 577 } 578 p.Location = append(p.Location, loc) 579 locs[addr] = loc 580 } 581 sloc = append(sloc, loc) 582 } 583 584 p.Sample = append(p.Sample, &Sample{ 585 Value: value, 586 Location: sloc, 587 NumLabel: map[string][]int64{"bytes": {blocksize}}, 588 }) 589 } 590 591 if err = parseAdditionalSections(l, r, p); err != nil { 592 return nil, err 593 } 594 return p, nil 595 } 596 597 // parseHeapSample parses a single row from a heap profile into a new Sample. 598 func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) { 599 sampleData := heapSampleRE.FindStringSubmatch(line) 600 if len(sampleData) != 6 { 601 return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData)) 602 } 603 604 // Use first two values by default; tcmalloc sampling generates the 605 // same value for both, only the older heap-profile collect separate 606 // stats for in-use and allocated objects. 607 valueIndex := 1 608 if LegacyHeapAllocated { 609 valueIndex = 3 610 } 611 612 var v1, v2 int64 613 if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil { 614 return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) 615 } 616 if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil { 617 return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) 618 } 619 620 if v1 == 0 { 621 if v2 != 0 { 622 return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2) 623 } 624 } else { 625 blocksize = v2 / v1 626 if sampling == "v2" { 627 v1, v2 = scaleHeapSample(v1, v2, rate) 628 } 629 } 630 631 value = []int64{v1, v2} 632 addrs = parseHexAddresses(sampleData[5]) 633 634 return value, blocksize, addrs, nil 635 } 636 637 // extractHexAddresses extracts hex numbers from a string and returns 638 // them, together with their numeric value, in a slice. 639 func extractHexAddresses(s string) ([]string, []uint64) { 640 hexStrings := hexNumberRE.FindAllString(s, -1) 641 var ids []uint64 642 for _, s := range hexStrings { 643 if id, err := strconv.ParseUint(s, 0, 64); err == nil { 644 ids = append(ids, id) 645 } else { 646 // Do not expect any parsing failures due to the regexp matching. 647 panic("failed to parse hex value:" + s) 648 } 649 } 650 return hexStrings, ids 651 } 652 653 // parseHexAddresses parses hex numbers from a string and returns them 654 // in a slice. 655 func parseHexAddresses(s string) []uint64 { 656 _, ids := extractHexAddresses(s) 657 return ids 658 } 659 660 // scaleHeapSample adjusts the data from a heapz Sample to 661 // account for its probability of appearing in the collected 662 // data. heapz profiles are a sampling of the memory allocations 663 // requests in a program. We estimate the unsampled value by dividing 664 // each collected sample by its probability of appearing in the 665 // profile. heapz v2 profiles rely on a poisson process to determine 666 // which samples to collect, based on the desired average collection 667 // rate R. The probability of a sample of size S to appear in that 668 // profile is 1-exp(-S/R). 669 func scaleHeapSample(count, size, rate int64) (int64, int64) { 670 if count == 0 || size == 0 { 671 return 0, 0 672 } 673 674 if rate <= 1 { 675 // if rate==1 all samples were collected so no adjustment is needed. 676 // if rate<1 treat as unknown and skip scaling. 677 return count, size 678 } 679 680 avgSize := float64(size) / float64(count) 681 scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) 682 683 return int64(float64(count) * scale), int64(float64(size) * scale) 684 } 685 686 // parseContention parses a mutex or contention profile. There are 2 cases: 687 // "--- contentionz " for legacy C++ profiles (and backwards compatibility) 688 // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime. 689 // This code converts the text output from runtime into a *Profile. (In the future 690 // the runtime might write a serialized Profile directly making this unnecessary.) 691 func parseContention(b []byte) (*Profile, error) { 692 r := bytes.NewBuffer(b) 693 var l string 694 var err error 695 for { 696 // Skip past comments and empty lines seeking a real header. 697 l, err = r.ReadString('\n') 698 if err != nil { 699 return nil, err 700 } 701 if !isSpaceOrComment(l) { 702 break 703 } 704 } 705 706 if strings.HasPrefix(l, "--- contentionz ") { 707 return parseCppContention(r) 708 } else if strings.HasPrefix(l, "--- mutex:") { 709 return parseCppContention(r) 710 } else if strings.HasPrefix(l, "--- contention:") { 711 return parseCppContention(r) 712 } 713 return nil, errUnrecognized 714 } 715 716 // parseCppContention parses the output from synchronization_profiling.cc 717 // for backward compatibility, and the compatible (non-debug) block profile 718 // output from the Go runtime. 719 func parseCppContention(r *bytes.Buffer) (*Profile, error) { 720 p := &Profile{ 721 PeriodType: &ValueType{Type: "contentions", Unit: "count"}, 722 Period: 1, 723 SampleType: []*ValueType{ 724 {Type: "contentions", Unit: "count"}, 725 {Type: "delay", Unit: "nanoseconds"}, 726 }, 727 } 728 729 var cpuHz int64 730 var l string 731 var err error 732 // Parse text of the form "attribute = value" before the samples. 733 const delimiter = "=" 734 for { 735 l, err = r.ReadString('\n') 736 if err != nil { 737 if err != io.EOF { 738 return nil, err 739 } 740 741 if l == "" { 742 break 743 } 744 } 745 if isSpaceOrComment(l) { 746 continue 747 } 748 749 if l = strings.TrimSpace(l); l == "" { 750 continue 751 } 752 753 if strings.HasPrefix(l, "---") { 754 break 755 } 756 757 attr := strings.SplitN(l, delimiter, 2) 758 if len(attr) != 2 { 759 break 760 } 761 key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]) 762 var err error 763 switch key { 764 case "cycles/second": 765 if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil { 766 return nil, errUnrecognized 767 } 768 case "sampling period": 769 if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil { 770 return nil, errUnrecognized 771 } 772 case "ms since reset": 773 ms, err := strconv.ParseInt(val, 0, 64) 774 if err != nil { 775 return nil, errUnrecognized 776 } 777 p.DurationNanos = ms * 1000 * 1000 778 case "format": 779 // CPP contentionz profiles don't have format. 780 return nil, errUnrecognized 781 case "resolution": 782 // CPP contentionz profiles don't have resolution. 783 return nil, errUnrecognized 784 case "discarded samples": 785 default: 786 return nil, errUnrecognized 787 } 788 } 789 790 locs := make(map[uint64]*Location) 791 for { 792 if !isSpaceOrComment(l) { 793 if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") { 794 break 795 } 796 value, addrs, err := parseContentionSample(l, p.Period, cpuHz) 797 if err != nil { 798 return nil, err 799 } 800 var sloc []*Location 801 for _, addr := range addrs { 802 // Addresses from stack traces point to the next instruction after 803 // each call. Adjust by -1 to land somewhere on the actual call. 804 addr-- 805 loc := locs[addr] 806 if locs[addr] == nil { 807 loc = &Location{ 808 Address: addr, 809 } 810 p.Location = append(p.Location, loc) 811 locs[addr] = loc 812 } 813 sloc = append(sloc, loc) 814 } 815 p.Sample = append(p.Sample, &Sample{ 816 Value: value, 817 Location: sloc, 818 }) 819 } 820 821 if l, err = r.ReadString('\n'); err != nil { 822 if err != io.EOF { 823 return nil, err 824 } 825 if l == "" { 826 break 827 } 828 } 829 } 830 831 if err = parseAdditionalSections(l, r, p); err != nil { 832 return nil, err 833 } 834 835 return p, nil 836 } 837 838 // parseContentionSample parses a single row from a contention profile 839 // into a new Sample. 840 func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) { 841 sampleData := contentionSampleRE.FindStringSubmatch(line) 842 if sampleData == nil { 843 return value, addrs, errUnrecognized 844 } 845 846 v1, err := strconv.ParseInt(sampleData[1], 10, 64) 847 if err != nil { 848 return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) 849 } 850 v2, err := strconv.ParseInt(sampleData[2], 10, 64) 851 if err != nil { 852 return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) 853 } 854 855 // Unsample values if period and cpuHz are available. 856 // - Delays are scaled to cycles and then to nanoseconds. 857 // - Contentions are scaled to cycles. 858 if period > 0 { 859 if cpuHz > 0 { 860 cpuGHz := float64(cpuHz) / 1e9 861 v1 = int64(float64(v1) * float64(period) / cpuGHz) 862 } 863 v2 = v2 * period 864 } 865 866 value = []int64{v2, v1} 867 addrs = parseHexAddresses(sampleData[3]) 868 869 return value, addrs, nil 870 } 871 872 // parseThread parses a Threadz profile and returns a new Profile. 873 func parseThread(b []byte) (*Profile, error) { 874 r := bytes.NewBuffer(b) 875 876 var line string 877 var err error 878 for { 879 // Skip past comments and empty lines seeking a real header. 880 line, err = r.ReadString('\n') 881 if err != nil { 882 return nil, err 883 } 884 if !isSpaceOrComment(line) { 885 break 886 } 887 } 888 889 if m := threadzStartRE.FindStringSubmatch(line); m != nil { 890 // Advance over initial comments until first stack trace. 891 for { 892 line, err = r.ReadString('\n') 893 if err != nil { 894 if err != io.EOF { 895 return nil, err 896 } 897 898 if line == "" { 899 break 900 } 901 } 902 if sectionTrigger(line) != unrecognizedSection || line[0] == '-' { 903 break 904 } 905 } 906 } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { 907 return nil, errUnrecognized 908 } 909 910 p := &Profile{ 911 SampleType: []*ValueType{{Type: "thread", Unit: "count"}}, 912 PeriodType: &ValueType{Type: "thread", Unit: "count"}, 913 Period: 1, 914 } 915 916 locs := make(map[uint64]*Location) 917 // Recognize each thread and populate profile samples. 918 for sectionTrigger(line) == unrecognizedSection { 919 if strings.HasPrefix(line, "---- no stack trace for") { 920 line = "" 921 break 922 } 923 if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { 924 return nil, errUnrecognized 925 } 926 927 var addrs []uint64 928 line, addrs, err = parseThreadSample(r) 929 if err != nil { 930 return nil, errUnrecognized 931 } 932 if len(addrs) == 0 { 933 // We got a --same as previous threads--. Bump counters. 934 if len(p.Sample) > 0 { 935 s := p.Sample[len(p.Sample)-1] 936 s.Value[0]++ 937 } 938 continue 939 } 940 941 var sloc []*Location 942 for _, addr := range addrs { 943 // Addresses from stack traces point to the next instruction after 944 // each call. Adjust by -1 to land somewhere on the actual call. 945 addr-- 946 loc := locs[addr] 947 if locs[addr] == nil { 948 loc = &Location{ 949 Address: addr, 950 } 951 p.Location = append(p.Location, loc) 952 locs[addr] = loc 953 } 954 sloc = append(sloc, loc) 955 } 956 957 p.Sample = append(p.Sample, &Sample{ 958 Value: []int64{1}, 959 Location: sloc, 960 }) 961 } 962 963 if err = parseAdditionalSections(line, r, p); err != nil { 964 return nil, err 965 } 966 967 return p, nil 968 } 969 970 // parseThreadSample parses a symbolized or unsymbolized stack trace. 971 // Returns the first line after the traceback, the sample (or nil if 972 // it hits a 'same-as-previous' marker) and an error. 973 func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) { 974 var l string 975 sameAsPrevious := false 976 for { 977 if l, err = b.ReadString('\n'); err != nil { 978 if err != io.EOF { 979 return "", nil, err 980 } 981 if l == "" { 982 break 983 } 984 } 985 if l = strings.TrimSpace(l); l == "" { 986 continue 987 } 988 989 if strings.HasPrefix(l, "---") { 990 break 991 } 992 if strings.Contains(l, "same as previous thread") { 993 sameAsPrevious = true 994 continue 995 } 996 997 addrs = append(addrs, parseHexAddresses(l)...) 998 } 999 1000 if sameAsPrevious { 1001 return l, nil, nil 1002 } 1003 return l, addrs, nil 1004 } 1005 1006 // parseAdditionalSections parses any additional sections in the 1007 // profile, ignoring any unrecognized sections. 1008 func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) { 1009 for { 1010 if sectionTrigger(l) == memoryMapSection { 1011 break 1012 } 1013 // Ignore any unrecognized sections. 1014 if l, err := b.ReadString('\n'); err != nil { 1015 if err != io.EOF { 1016 return err 1017 } 1018 if l == "" { 1019 break 1020 } 1021 } 1022 } 1023 return p.ParseMemoryMap(b) 1024 } 1025 1026 // ParseMemoryMap parses a memory map in the format of 1027 // /proc/self/maps, and overrides the mappings in the current profile. 1028 // It renumbers the samples and locations in the profile correspondingly. 1029 func (p *Profile) ParseMemoryMap(rd io.Reader) error { 1030 b := bufio.NewReader(rd) 1031 1032 var attrs []string 1033 var r *strings.Replacer 1034 const delimiter = "=" 1035 for { 1036 l, err := b.ReadString('\n') 1037 if err != nil { 1038 if err != io.EOF { 1039 return err 1040 } 1041 if l == "" { 1042 break 1043 } 1044 } 1045 if l = strings.TrimSpace(l); l == "" { 1046 continue 1047 } 1048 1049 if r != nil { 1050 l = r.Replace(l) 1051 } 1052 m, err := parseMappingEntry(l) 1053 if err != nil { 1054 if err == errUnrecognized { 1055 // Recognize assignments of the form: attr=value, and replace 1056 // $attr with value on subsequent mappings. 1057 if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 { 1058 attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])) 1059 r = strings.NewReplacer(attrs...) 1060 } 1061 // Ignore any unrecognized entries 1062 continue 1063 } 1064 return err 1065 } 1066 if m == nil || (m.File == "" && len(p.Mapping) != 0) { 1067 // In some cases the first entry may include the address range 1068 // but not the name of the file. It should be followed by 1069 // another entry with the name. 1070 continue 1071 } 1072 if len(p.Mapping) == 1 && p.Mapping[0].File == "" { 1073 // Update the name if this is the entry following that empty one. 1074 p.Mapping[0].File = m.File 1075 continue 1076 } 1077 p.Mapping = append(p.Mapping, m) 1078 } 1079 p.remapLocationIDs() 1080 p.remapFunctionIDs() 1081 p.remapMappingIDs() 1082 return nil 1083 } 1084 1085 func parseMappingEntry(l string) (*Mapping, error) { 1086 mapping := &Mapping{} 1087 var err error 1088 if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 { 1089 if !strings.Contains(me[3], "x") { 1090 // Skip non-executable entries. 1091 return nil, nil 1092 } 1093 if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { 1094 return nil, errUnrecognized 1095 } 1096 if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { 1097 return nil, errUnrecognized 1098 } 1099 if me[4] != "" { 1100 if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil { 1101 return nil, errUnrecognized 1102 } 1103 } 1104 mapping.File = me[8] 1105 return mapping, nil 1106 } 1107 1108 if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 { 1109 if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { 1110 return nil, errUnrecognized 1111 } 1112 if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { 1113 return nil, errUnrecognized 1114 } 1115 mapping.File = me[3] 1116 if me[5] != "" { 1117 if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil { 1118 return nil, errUnrecognized 1119 } 1120 } 1121 return mapping, nil 1122 } 1123 1124 return nil, errUnrecognized 1125 } 1126 1127 type sectionType int 1128 1129 const ( 1130 unrecognizedSection sectionType = iota 1131 memoryMapSection 1132 ) 1133 1134 var memoryMapTriggers = []string{ 1135 "--- Memory map: ---", 1136 "MAPPED_LIBRARIES:", 1137 } 1138 1139 func sectionTrigger(line string) sectionType { 1140 for _, trigger := range memoryMapTriggers { 1141 if strings.Contains(line, trigger) { 1142 return memoryMapSection 1143 } 1144 } 1145 return unrecognizedSection 1146 } 1147 1148 func (p *Profile) addLegacyFrameInfo() { 1149 switch { 1150 case isProfileType(p, heapzSampleTypes) || 1151 isProfileType(p, heapzInUseSampleTypes) || 1152 isProfileType(p, heapzAllocSampleTypes): 1153 p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr 1154 case isProfileType(p, contentionzSampleTypes): 1155 p.DropFrames, p.KeepFrames = lockRxStr, "" 1156 default: 1157 p.DropFrames, p.KeepFrames = cpuProfilerRxStr, "" 1158 } 1159 } 1160 1161 var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles 1162 var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"} 1163 var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"} 1164 var contentionzSampleTypes = []string{"contentions", "delay"} 1165 1166 func isProfileType(p *Profile, t []string) bool { 1167 st := p.SampleType 1168 if len(st) != len(t) { 1169 return false 1170 } 1171 1172 for i := range st { 1173 if st[i].Type != t[i] { 1174 return false 1175 } 1176 } 1177 return true 1178 } 1179 1180 var allocRxStr = strings.Join([]string{ 1181 // POSIX entry points. 1182 `calloc`, 1183 `cfree`, 1184 `malloc`, 1185 `free`, 1186 `memalign`, 1187 `do_memalign`, 1188 `(__)?posix_memalign`, 1189 `pvalloc`, 1190 `valloc`, 1191 `realloc`, 1192 1193 // TC malloc. 1194 `tcmalloc::.*`, 1195 `tc_calloc`, 1196 `tc_cfree`, 1197 `tc_malloc`, 1198 `tc_free`, 1199 `tc_memalign`, 1200 `tc_posix_memalign`, 1201 `tc_pvalloc`, 1202 `tc_valloc`, 1203 `tc_realloc`, 1204 `tc_new`, 1205 `tc_delete`, 1206 `tc_newarray`, 1207 `tc_deletearray`, 1208 `tc_new_nothrow`, 1209 `tc_newarray_nothrow`, 1210 1211 // Memory-allocation routines on OS X. 1212 `malloc_zone_malloc`, 1213 `malloc_zone_calloc`, 1214 `malloc_zone_valloc`, 1215 `malloc_zone_realloc`, 1216 `malloc_zone_memalign`, 1217 `malloc_zone_free`, 1218 1219 // Go runtime 1220 `runtime\..*`, 1221 1222 // Other misc. memory allocation routines 1223 `BaseArena::.*`, 1224 `(::)?do_malloc_no_errno`, 1225 `(::)?do_malloc_pages`, 1226 `(::)?do_malloc`, 1227 `DoSampledAllocation`, 1228 `MallocedMemBlock::MallocedMemBlock`, 1229 `_M_allocate`, 1230 `__builtin_(vec_)?delete`, 1231 `__builtin_(vec_)?new`, 1232 `__gnu_cxx::new_allocator::allocate`, 1233 `__libc_malloc`, 1234 `__malloc_alloc_template::allocate`, 1235 `allocate`, 1236 `cpp_alloc`, 1237 `operator new(\[\])?`, 1238 `simple_alloc::allocate`, 1239 }, `|`) 1240 1241 var allocSkipRxStr = strings.Join([]string{ 1242 // Preserve Go runtime frames that appear in the middle/bottom of 1243 // the stack. 1244 `runtime\.panic`, 1245 `runtime\.reflectcall`, 1246 `runtime\.call[0-9]*`, 1247 }, `|`) 1248 1249 var cpuProfilerRxStr = strings.Join([]string{ 1250 `ProfileData::Add`, 1251 `ProfileData::prof_handler`, 1252 `CpuProfiler::prof_handler`, 1253 `__pthread_sighandler`, 1254 `__restore`, 1255 }, `|`) 1256 1257 var lockRxStr = strings.Join([]string{ 1258 `RecordLockProfileData`, 1259 `(base::)?RecordLockProfileData.*`, 1260 `(base::)?SubmitMutexProfileData.*`, 1261 `(base::)?SubmitSpinLockProfileData.*`, 1262 `(Mutex::)?AwaitCommon.*`, 1263 `(Mutex::)?Unlock.*`, 1264 `(Mutex::)?UnlockSlow.*`, 1265 `(Mutex::)?ReaderUnlock.*`, 1266 `(MutexLock::)?~MutexLock.*`, 1267 `(SpinLock::)?Unlock.*`, 1268 `(SpinLock::)?SlowUnlock.*`, 1269 `(SpinLockHolder::)?~SpinLockHolder.*`, 1270 }, `|`)