github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/cmd/internal/pprof/profile/legacy_profile.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements parsers to convert legacy profiles into the
     6  // profile.proto format.
     7  
     8  package profile
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"fmt"
    14  	"io"
    15  	"math"
    16  	"regexp"
    17  	"strconv"
    18  	"strings"
    19  )
    20  
    21  var (
    22  	countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
    23  	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
    24  
    25  	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
    26  	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
    27  
    28  	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
    29  
    30  	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
    31  
    32  	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
    33  
    34  	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
    35  
    36  	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
    37  	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
    38  
    39  	procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
    40  
    41  	briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
    42  
    43  	// LegacyHeapAllocated instructs the heapz parsers to use the
    44  	// allocated memory stats instead of the default in-use memory. Note
    45  	// that tcmalloc doesn't provide all allocated memory, only in-use
    46  	// stats.
    47  	LegacyHeapAllocated bool
    48  )
    49  
    50  func isSpaceOrComment(line string) bool {
    51  	trimmed := strings.TrimSpace(line)
    52  	return len(trimmed) == 0 || trimmed[0] == '#'
    53  }
    54  
    55  // parseGoCount parses a Go count profile (e.g., threadcreate or
    56  // goroutine) and returns a new Profile.
    57  func parseGoCount(b []byte) (*Profile, error) {
    58  	r := bytes.NewBuffer(b)
    59  
    60  	var line string
    61  	var err error
    62  	for {
    63  		// Skip past comments and empty lines seeking a real header.
    64  		line, err = r.ReadString('\n')
    65  		if err != nil {
    66  			return nil, err
    67  		}
    68  		if !isSpaceOrComment(line) {
    69  			break
    70  		}
    71  	}
    72  
    73  	m := countStartRE.FindStringSubmatch(line)
    74  	if m == nil {
    75  		return nil, errUnrecognized
    76  	}
    77  	profileType := m[1]
    78  	p := &Profile{
    79  		PeriodType: &ValueType{Type: profileType, Unit: "count"},
    80  		Period:     1,
    81  		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
    82  	}
    83  	locations := make(map[uint64]*Location)
    84  	for {
    85  		line, err = r.ReadString('\n')
    86  		if err != nil {
    87  			if err == io.EOF {
    88  				break
    89  			}
    90  			return nil, err
    91  		}
    92  		if isSpaceOrComment(line) {
    93  			continue
    94  		}
    95  		if strings.HasPrefix(line, "---") {
    96  			break
    97  		}
    98  		m := countRE.FindStringSubmatch(line)
    99  		if m == nil {
   100  			return nil, errMalformed
   101  		}
   102  		n, err := strconv.ParseInt(m[1], 0, 64)
   103  		if err != nil {
   104  			return nil, errMalformed
   105  		}
   106  		fields := strings.Fields(m[2])
   107  		locs := make([]*Location, 0, len(fields))
   108  		for _, stk := range fields {
   109  			addr, err := strconv.ParseUint(stk, 0, 64)
   110  			if err != nil {
   111  				return nil, errMalformed
   112  			}
   113  			// Adjust all frames by -1 to land on the call instruction.
   114  			addr--
   115  			loc := locations[addr]
   116  			if loc == nil {
   117  				loc = &Location{
   118  					Address: addr,
   119  				}
   120  				locations[addr] = loc
   121  				p.Location = append(p.Location, loc)
   122  			}
   123  			locs = append(locs, loc)
   124  		}
   125  		p.Sample = append(p.Sample, &Sample{
   126  			Location: locs,
   127  			Value:    []int64{n},
   128  		})
   129  	}
   130  
   131  	if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
   132  		return nil, err
   133  	}
   134  	return p, nil
   135  }
   136  
   137  // remapLocationIDs ensures there is a location for each address
   138  // referenced by a sample, and remaps the samples to point to the new
   139  // location ids.
   140  func (p *Profile) remapLocationIDs() {
   141  	seen := make(map[*Location]bool, len(p.Location))
   142  	var locs []*Location
   143  
   144  	for _, s := range p.Sample {
   145  		for _, l := range s.Location {
   146  			if seen[l] {
   147  				continue
   148  			}
   149  			l.ID = uint64(len(locs) + 1)
   150  			locs = append(locs, l)
   151  			seen[l] = true
   152  		}
   153  	}
   154  	p.Location = locs
   155  }
   156  
   157  func (p *Profile) remapFunctionIDs() {
   158  	seen := make(map[*Function]bool, len(p.Function))
   159  	var fns []*Function
   160  
   161  	for _, l := range p.Location {
   162  		for _, ln := range l.Line {
   163  			fn := ln.Function
   164  			if fn == nil || seen[fn] {
   165  				continue
   166  			}
   167  			fn.ID = uint64(len(fns) + 1)
   168  			fns = append(fns, fn)
   169  			seen[fn] = true
   170  		}
   171  	}
   172  	p.Function = fns
   173  }
   174  
   175  // remapMappingIDs matches location addresses with existing mappings
   176  // and updates them appropriately. This is O(N*M), if this ever shows
   177  // up as a bottleneck, evaluate sorting the mappings and doing a
   178  // binary search, which would make it O(N*log(M)).
   179  func (p *Profile) remapMappingIDs() {
   180  	if len(p.Mapping) == 0 {
   181  		return
   182  	}
   183  
   184  	// Some profile handlers will incorrectly set regions for the main
   185  	// executable if its section is remapped. Fix them through heuristics.
   186  
   187  	// Remove the initial mapping if named '/anon_hugepage' and has a
   188  	// consecutive adjacent mapping.
   189  	if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
   190  		if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
   191  			p.Mapping = p.Mapping[1:]
   192  		}
   193  	}
   194  
   195  	// Subtract the offset from the start of the main mapping if it
   196  	// ends up at a recognizable start address.
   197  	const expectedStart = 0x400000
   198  	if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
   199  		m.Start = expectedStart
   200  		m.Offset = 0
   201  	}
   202  
   203  	for _, l := range p.Location {
   204  		if a := l.Address; a != 0 {
   205  			for _, m := range p.Mapping {
   206  				if m.Start <= a && a < m.Limit {
   207  					l.Mapping = m
   208  					break
   209  				}
   210  			}
   211  		}
   212  	}
   213  
   214  	// Reset all mapping IDs.
   215  	for i, m := range p.Mapping {
   216  		m.ID = uint64(i + 1)
   217  	}
   218  }
   219  
   220  var cpuInts = []func([]byte) (uint64, []byte){
   221  	get32l,
   222  	get32b,
   223  	get64l,
   224  	get64b,
   225  }
   226  
   227  func get32l(b []byte) (uint64, []byte) {
   228  	if len(b) < 4 {
   229  		return 0, nil
   230  	}
   231  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
   232  }
   233  
   234  func get32b(b []byte) (uint64, []byte) {
   235  	if len(b) < 4 {
   236  		return 0, nil
   237  	}
   238  	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
   239  }
   240  
   241  func get64l(b []byte) (uint64, []byte) {
   242  	if len(b) < 8 {
   243  		return 0, nil
   244  	}
   245  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
   246  }
   247  
   248  func get64b(b []byte) (uint64, []byte) {
   249  	if len(b) < 8 {
   250  		return 0, nil
   251  	}
   252  	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
   253  }
   254  
   255  // ParseTracebacks parses a set of tracebacks and returns a newly
   256  // populated profile. It will accept any text file and generate a
   257  // Profile out of it with any hex addresses it can identify, including
   258  // a process map if it can recognize one. Each sample will include a
   259  // tag "source" with the addresses recognized in string format.
   260  func ParseTracebacks(b []byte) (*Profile, error) {
   261  	r := bytes.NewBuffer(b)
   262  
   263  	p := &Profile{
   264  		PeriodType: &ValueType{Type: "trace", Unit: "count"},
   265  		Period:     1,
   266  		SampleType: []*ValueType{
   267  			{Type: "trace", Unit: "count"},
   268  		},
   269  	}
   270  
   271  	var sources []string
   272  	var sloc []*Location
   273  
   274  	locs := make(map[uint64]*Location)
   275  	for {
   276  		l, err := r.ReadString('\n')
   277  		if err != nil {
   278  			if err != io.EOF {
   279  				return nil, err
   280  			}
   281  			if l == "" {
   282  				break
   283  			}
   284  		}
   285  		if sectionTrigger(l) == memoryMapSection {
   286  			break
   287  		}
   288  		if s, addrs := extractHexAddresses(l); len(s) > 0 {
   289  			for _, addr := range addrs {
   290  				// Addresses from stack traces point to the next instruction after
   291  				// each call. Adjust by -1 to land somewhere on the actual call.
   292  				addr--
   293  				loc := locs[addr]
   294  				if locs[addr] == nil {
   295  					loc = &Location{
   296  						Address: addr,
   297  					}
   298  					p.Location = append(p.Location, loc)
   299  					locs[addr] = loc
   300  				}
   301  				sloc = append(sloc, loc)
   302  			}
   303  
   304  			sources = append(sources, s...)
   305  		} else {
   306  			if len(sources) > 0 || len(sloc) > 0 {
   307  				addTracebackSample(sloc, sources, p)
   308  				sloc, sources = nil, nil
   309  			}
   310  		}
   311  	}
   312  
   313  	// Add final sample to save any leftover data.
   314  	if len(sources) > 0 || len(sloc) > 0 {
   315  		addTracebackSample(sloc, sources, p)
   316  	}
   317  
   318  	if err := p.ParseMemoryMap(r); err != nil {
   319  		return nil, err
   320  	}
   321  	return p, nil
   322  }
   323  
   324  func addTracebackSample(l []*Location, s []string, p *Profile) {
   325  	p.Sample = append(p.Sample,
   326  		&Sample{
   327  			Value:    []int64{1},
   328  			Location: l,
   329  			Label:    map[string][]string{"source": s},
   330  		})
   331  }
   332  
   333  // parseCPU parses a profilez legacy profile and returns a newly
   334  // populated Profile.
   335  //
   336  // The general format for profilez samples is a sequence of words in
   337  // binary format. The first words are a header with the following data:
   338  //   1st word -- 0
   339  //   2nd word -- 3
   340  //   3rd word -- 0 if a c++ application, 1 if a java application.
   341  //   4th word -- Sampling period (in microseconds).
   342  //   5th word -- Padding.
   343  func parseCPU(b []byte) (*Profile, error) {
   344  	var parse func([]byte) (uint64, []byte)
   345  	var n1, n2, n3, n4, n5 uint64
   346  	for _, parse = range cpuInts {
   347  		var tmp []byte
   348  		n1, tmp = parse(b)
   349  		n2, tmp = parse(tmp)
   350  		n3, tmp = parse(tmp)
   351  		n4, tmp = parse(tmp)
   352  		n5, tmp = parse(tmp)
   353  
   354  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
   355  			b = tmp
   356  			return cpuProfile(b, int64(n4), parse)
   357  		}
   358  	}
   359  	return nil, errUnrecognized
   360  }
   361  
   362  // cpuProfile returns a new Profile from C++ profilez data.
   363  // b is the profile bytes after the header, period is the profiling
   364  // period, and parse is a function to parse 8-byte chunks from the
   365  // profile in its native endianness.
   366  func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
   367  	p := &Profile{
   368  		Period:     period * 1000,
   369  		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
   370  		SampleType: []*ValueType{
   371  			{Type: "samples", Unit: "count"},
   372  			{Type: "cpu", Unit: "nanoseconds"},
   373  		},
   374  	}
   375  	var err error
   376  	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
   377  		return nil, err
   378  	}
   379  
   380  	// If all samples have the same second-to-the-bottom frame, it
   381  	// strongly suggests that it is an uninteresting artifact of
   382  	// measurement -- a stack frame pushed by the signal handler. The
   383  	// bottom frame is always correct as it is picked up from the signal
   384  	// structure, not the stack. Check if this is the case and if so,
   385  	// remove.
   386  	if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
   387  		allSame := true
   388  		id1 := p.Sample[0].Location[1].Address
   389  		for _, s := range p.Sample {
   390  			if len(s.Location) < 2 || id1 != s.Location[1].Address {
   391  				allSame = false
   392  				break
   393  			}
   394  		}
   395  		if allSame {
   396  			for _, s := range p.Sample {
   397  				s.Location = append(s.Location[:1], s.Location[2:]...)
   398  			}
   399  		}
   400  	}
   401  
   402  	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
   403  		return nil, err
   404  	}
   405  	return p, nil
   406  }
   407  
   408  // parseCPUSamples parses a collection of profilez samples from a
   409  // profile.
   410  //
   411  // profilez samples are a repeated sequence of stack frames of the
   412  // form:
   413  //    1st word -- The number of times this stack was encountered.
   414  //    2nd word -- The size of the stack (StackSize).
   415  //    3rd word -- The first address on the stack.
   416  //    ...
   417  //    StackSize + 2 -- The last address on the stack
   418  // The last stack trace is of the form:
   419  //   1st word -- 0
   420  //   2nd word -- 1
   421  //   3rd word -- 0
   422  //
   423  // Addresses from stack traces may point to the next instruction after
   424  // each call. Optionally adjust by -1 to land somewhere on the actual
   425  // call (except for the leaf, which is not a call).
   426  func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
   427  	locs := make(map[uint64]*Location)
   428  	for len(b) > 0 {
   429  		var count, nstk uint64
   430  		count, b = parse(b)
   431  		nstk, b = parse(b)
   432  		if b == nil || nstk > uint64(len(b)/4) {
   433  			return nil, nil, errUnrecognized
   434  		}
   435  		var sloc []*Location
   436  		addrs := make([]uint64, nstk)
   437  		for i := 0; i < int(nstk); i++ {
   438  			addrs[i], b = parse(b)
   439  		}
   440  
   441  		if count == 0 && nstk == 1 && addrs[0] == 0 {
   442  			// End of data marker
   443  			break
   444  		}
   445  		for i, addr := range addrs {
   446  			if adjust && i > 0 {
   447  				addr--
   448  			}
   449  			loc := locs[addr]
   450  			if loc == nil {
   451  				loc = &Location{
   452  					Address: addr,
   453  				}
   454  				locs[addr] = loc
   455  				p.Location = append(p.Location, loc)
   456  			}
   457  			sloc = append(sloc, loc)
   458  		}
   459  		p.Sample = append(p.Sample,
   460  			&Sample{
   461  				Value:    []int64{int64(count), int64(count) * p.Period},
   462  				Location: sloc,
   463  			})
   464  	}
   465  	// Reached the end without finding the EOD marker.
   466  	return b, locs, nil
   467  }
   468  
   469  // parseHeap parses a heapz legacy or a growthz profile and
   470  // returns a newly populated Profile.
   471  func parseHeap(b []byte) (p *Profile, err error) {
   472  	r := bytes.NewBuffer(b)
   473  	l, err := r.ReadString('\n')
   474  	if err != nil {
   475  		return nil, errUnrecognized
   476  	}
   477  
   478  	sampling := ""
   479  
   480  	if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
   481  		p = &Profile{
   482  			SampleType: []*ValueType{
   483  				{Type: "objects", Unit: "count"},
   484  				{Type: "space", Unit: "bytes"},
   485  			},
   486  			PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
   487  		}
   488  
   489  		var period int64
   490  		if len(header[6]) > 0 {
   491  			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
   492  				return nil, errUnrecognized
   493  			}
   494  		}
   495  
   496  		switch header[5] {
   497  		case "heapz_v2", "heap_v2":
   498  			sampling, p.Period = "v2", period
   499  		case "heapprofile":
   500  			sampling, p.Period = "", 1
   501  		case "heap":
   502  			sampling, p.Period = "v2", period/2
   503  		default:
   504  			return nil, errUnrecognized
   505  		}
   506  	} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
   507  		p = &Profile{
   508  			SampleType: []*ValueType{
   509  				{Type: "objects", Unit: "count"},
   510  				{Type: "space", Unit: "bytes"},
   511  			},
   512  			PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
   513  			Period:     1,
   514  		}
   515  	} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
   516  		p = &Profile{
   517  			SampleType: []*ValueType{
   518  				{Type: "objects", Unit: "count"},
   519  				{Type: "space", Unit: "bytes"},
   520  			},
   521  			PeriodType: &ValueType{Type: "allocations", Unit: "count"},
   522  			Period:     1,
   523  		}
   524  	} else {
   525  		return nil, errUnrecognized
   526  	}
   527  
   528  	if LegacyHeapAllocated {
   529  		for _, st := range p.SampleType {
   530  			st.Type = "alloc_" + st.Type
   531  		}
   532  	} else {
   533  		for _, st := range p.SampleType {
   534  			st.Type = "inuse_" + st.Type
   535  		}
   536  	}
   537  
   538  	locs := make(map[uint64]*Location)
   539  	for {
   540  		l, err = r.ReadString('\n')
   541  		if err != nil {
   542  			if err != io.EOF {
   543  				return nil, err
   544  			}
   545  
   546  			if l == "" {
   547  				break
   548  			}
   549  		}
   550  
   551  		if isSpaceOrComment(l) {
   552  			continue
   553  		}
   554  		l = strings.TrimSpace(l)
   555  
   556  		if sectionTrigger(l) != unrecognizedSection {
   557  			break
   558  		}
   559  
   560  		value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
   561  		if err != nil {
   562  			return nil, err
   563  		}
   564  		var sloc []*Location
   565  		for _, addr := range addrs {
   566  			// Addresses from stack traces point to the next instruction after
   567  			// each call. Adjust by -1 to land somewhere on the actual call.
   568  			addr--
   569  			loc := locs[addr]
   570  			if locs[addr] == nil {
   571  				loc = &Location{
   572  					Address: addr,
   573  				}
   574  				p.Location = append(p.Location, loc)
   575  				locs[addr] = loc
   576  			}
   577  			sloc = append(sloc, loc)
   578  		}
   579  
   580  		p.Sample = append(p.Sample, &Sample{
   581  			Value:    value,
   582  			Location: sloc,
   583  			NumLabel: map[string][]int64{"bytes": {blocksize}},
   584  		})
   585  	}
   586  
   587  	if err = parseAdditionalSections(l, r, p); err != nil {
   588  		return nil, err
   589  	}
   590  	return p, nil
   591  }
   592  
   593  // parseHeapSample parses a single row from a heap profile into a new Sample.
   594  func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
   595  	sampleData := heapSampleRE.FindStringSubmatch(line)
   596  	if len(sampleData) != 6 {
   597  		return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
   598  	}
   599  
   600  	// Use first two values by default; tcmalloc sampling generates the
   601  	// same value for both, only the older heap-profile collect separate
   602  	// stats for in-use and allocated objects.
   603  	valueIndex := 1
   604  	if LegacyHeapAllocated {
   605  		valueIndex = 3
   606  	}
   607  
   608  	var v1, v2 int64
   609  	if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
   610  		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   611  	}
   612  	if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
   613  		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   614  	}
   615  
   616  	if v1 == 0 {
   617  		if v2 != 0 {
   618  			return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
   619  		}
   620  	} else {
   621  		blocksize = v2 / v1
   622  		if sampling == "v2" {
   623  			v1, v2 = scaleHeapSample(v1, v2, rate)
   624  		}
   625  	}
   626  
   627  	value = []int64{v1, v2}
   628  	addrs = parseHexAddresses(sampleData[5])
   629  
   630  	return value, blocksize, addrs, nil
   631  }
   632  
   633  // extractHexAddresses extracts hex numbers from a string and returns
   634  // them, together with their numeric value, in a slice.
   635  func extractHexAddresses(s string) ([]string, []uint64) {
   636  	hexStrings := hexNumberRE.FindAllString(s, -1)
   637  	var ids []uint64
   638  	for _, s := range hexStrings {
   639  		if id, err := strconv.ParseUint(s, 0, 64); err == nil {
   640  			ids = append(ids, id)
   641  		} else {
   642  			// Do not expect any parsing failures due to the regexp matching.
   643  			panic("failed to parse hex value:" + s)
   644  		}
   645  	}
   646  	return hexStrings, ids
   647  }
   648  
   649  // parseHexAddresses parses hex numbers from a string and returns them
   650  // in a slice.
   651  func parseHexAddresses(s string) []uint64 {
   652  	_, ids := extractHexAddresses(s)
   653  	return ids
   654  }
   655  
   656  // scaleHeapSample adjusts the data from a heapz Sample to
   657  // account for its probability of appearing in the collected
   658  // data. heapz profiles are a sampling of the memory allocations
   659  // requests in a program. We estimate the unsampled value by dividing
   660  // each collected sample by its probability of appearing in the
   661  // profile. heapz v2 profiles rely on a poisson process to determine
   662  // which samples to collect, based on the desired average collection
   663  // rate R. The probability of a sample of size S to appear in that
   664  // profile is 1-exp(-S/R).
   665  func scaleHeapSample(count, size, rate int64) (int64, int64) {
   666  	if count == 0 || size == 0 {
   667  		return 0, 0
   668  	}
   669  
   670  	if rate <= 1 {
   671  		// if rate==1 all samples were collected so no adjustment is needed.
   672  		// if rate<1 treat as unknown and skip scaling.
   673  		return count, size
   674  	}
   675  
   676  	avgSize := float64(size) / float64(count)
   677  	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
   678  
   679  	return int64(float64(count) * scale), int64(float64(size) * scale)
   680  }
   681  
   682  // parseContention parses a contentionz profile and returns a newly
   683  // populated Profile.
   684  func parseContention(b []byte) (p *Profile, err error) {
   685  	r := bytes.NewBuffer(b)
   686  	l, err := r.ReadString('\n')
   687  	if err != nil {
   688  		return nil, errUnrecognized
   689  	}
   690  
   691  	if !strings.HasPrefix(l, "--- contention") {
   692  		return nil, errUnrecognized
   693  	}
   694  
   695  	p = &Profile{
   696  		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
   697  		Period:     1,
   698  		SampleType: []*ValueType{
   699  			{Type: "contentions", Unit: "count"},
   700  			{Type: "delay", Unit: "nanoseconds"},
   701  		},
   702  	}
   703  
   704  	var cpuHz int64
   705  	// Parse text of the form "attribute = value" before the samples.
   706  	const delimiter = "="
   707  	for {
   708  		l, err = r.ReadString('\n')
   709  		if err != nil {
   710  			if err != io.EOF {
   711  				return nil, err
   712  			}
   713  
   714  			if l == "" {
   715  				break
   716  			}
   717  		}
   718  
   719  		if l = strings.TrimSpace(l); l == "" {
   720  			continue
   721  		}
   722  
   723  		if strings.HasPrefix(l, "---") {
   724  			break
   725  		}
   726  
   727  		attr := strings.SplitN(l, delimiter, 2)
   728  		if len(attr) != 2 {
   729  			break
   730  		}
   731  		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
   732  		var err error
   733  		switch key {
   734  		case "cycles/second":
   735  			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
   736  				return nil, errUnrecognized
   737  			}
   738  		case "sampling period":
   739  			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
   740  				return nil, errUnrecognized
   741  			}
   742  		case "ms since reset":
   743  			ms, err := strconv.ParseInt(val, 0, 64)
   744  			if err != nil {
   745  				return nil, errUnrecognized
   746  			}
   747  			p.DurationNanos = ms * 1000 * 1000
   748  		case "format":
   749  			// CPP contentionz profiles don't have format.
   750  			return nil, errUnrecognized
   751  		case "resolution":
   752  			// CPP contentionz profiles don't have resolution.
   753  			return nil, errUnrecognized
   754  		case "discarded samples":
   755  		default:
   756  			return nil, errUnrecognized
   757  		}
   758  	}
   759  
   760  	locs := make(map[uint64]*Location)
   761  	for {
   762  		if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
   763  			break
   764  		}
   765  		value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
   766  		if err != nil {
   767  			return nil, err
   768  		}
   769  		var sloc []*Location
   770  		for _, addr := range addrs {
   771  			// Addresses from stack traces point to the next instruction after
   772  			// each call. Adjust by -1 to land somewhere on the actual call.
   773  			addr--
   774  			loc := locs[addr]
   775  			if locs[addr] == nil {
   776  				loc = &Location{
   777  					Address: addr,
   778  				}
   779  				p.Location = append(p.Location, loc)
   780  				locs[addr] = loc
   781  			}
   782  			sloc = append(sloc, loc)
   783  		}
   784  		p.Sample = append(p.Sample, &Sample{
   785  			Value:    value,
   786  			Location: sloc,
   787  		})
   788  
   789  		if l, err = r.ReadString('\n'); err != nil {
   790  			if err != io.EOF {
   791  				return nil, err
   792  			}
   793  			if l == "" {
   794  				break
   795  			}
   796  		}
   797  	}
   798  
   799  	if err = parseAdditionalSections(l, r, p); err != nil {
   800  		return nil, err
   801  	}
   802  
   803  	return p, nil
   804  }
   805  
   806  // parseContentionSample parses a single row from a contention profile
   807  // into a new Sample.
   808  func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
   809  	sampleData := contentionSampleRE.FindStringSubmatch(line)
   810  	if sampleData == nil {
   811  		return value, addrs, errUnrecognized
   812  	}
   813  
   814  	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
   815  	if err != nil {
   816  		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   817  	}
   818  	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
   819  	if err != nil {
   820  		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   821  	}
   822  
   823  	// Unsample values if period and cpuHz are available.
   824  	// - Delays are scaled to cycles and then to nanoseconds.
   825  	// - Contentions are scaled to cycles.
   826  	if period > 0 {
   827  		if cpuHz > 0 {
   828  			cpuGHz := float64(cpuHz) / 1e9
   829  			v1 = int64(float64(v1) * float64(period) / cpuGHz)
   830  		}
   831  		v2 = v2 * period
   832  	}
   833  
   834  	value = []int64{v2, v1}
   835  	addrs = parseHexAddresses(sampleData[3])
   836  
   837  	return value, addrs, nil
   838  }
   839  
   840  // parseThread parses a Threadz profile and returns a new Profile.
   841  func parseThread(b []byte) (*Profile, error) {
   842  	r := bytes.NewBuffer(b)
   843  
   844  	var line string
   845  	var err error
   846  	for {
   847  		// Skip past comments and empty lines seeking a real header.
   848  		line, err = r.ReadString('\n')
   849  		if err != nil {
   850  			return nil, err
   851  		}
   852  		if !isSpaceOrComment(line) {
   853  			break
   854  		}
   855  	}
   856  
   857  	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
   858  		// Advance over initial comments until first stack trace.
   859  		for {
   860  			line, err = r.ReadString('\n')
   861  			if err != nil {
   862  				if err != io.EOF {
   863  					return nil, err
   864  				}
   865  
   866  				if line == "" {
   867  					break
   868  				}
   869  			}
   870  			if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
   871  				break
   872  			}
   873  		}
   874  	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   875  		return nil, errUnrecognized
   876  	}
   877  
   878  	p := &Profile{
   879  		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
   880  		PeriodType: &ValueType{Type: "thread", Unit: "count"},
   881  		Period:     1,
   882  	}
   883  
   884  	locs := make(map[uint64]*Location)
   885  	// Recognize each thread and populate profile samples.
   886  	for sectionTrigger(line) == unrecognizedSection {
   887  		if strings.HasPrefix(line, "---- no stack trace for") {
   888  			line = ""
   889  			break
   890  		}
   891  		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   892  			return nil, errUnrecognized
   893  		}
   894  
   895  		var addrs []uint64
   896  		line, addrs, err = parseThreadSample(r)
   897  		if err != nil {
   898  			return nil, errUnrecognized
   899  		}
   900  		if len(addrs) == 0 {
   901  			// We got a --same as previous threads--. Bump counters.
   902  			if len(p.Sample) > 0 {
   903  				s := p.Sample[len(p.Sample)-1]
   904  				s.Value[0]++
   905  			}
   906  			continue
   907  		}
   908  
   909  		var sloc []*Location
   910  		for _, addr := range addrs {
   911  			// Addresses from stack traces point to the next instruction after
   912  			// each call. Adjust by -1 to land somewhere on the actual call.
   913  			addr--
   914  			loc := locs[addr]
   915  			if locs[addr] == nil {
   916  				loc = &Location{
   917  					Address: addr,
   918  				}
   919  				p.Location = append(p.Location, loc)
   920  				locs[addr] = loc
   921  			}
   922  			sloc = append(sloc, loc)
   923  		}
   924  
   925  		p.Sample = append(p.Sample, &Sample{
   926  			Value:    []int64{1},
   927  			Location: sloc,
   928  		})
   929  	}
   930  
   931  	if err = parseAdditionalSections(line, r, p); err != nil {
   932  		return nil, err
   933  	}
   934  
   935  	return p, nil
   936  }
   937  
   938  // parseThreadSample parses a symbolized or unsymbolized stack trace.
   939  // Returns the first line after the traceback, the sample (or nil if
   940  // it hits a 'same-as-previous' marker) and an error.
   941  func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
   942  	var l string
   943  	sameAsPrevious := false
   944  	for {
   945  		if l, err = b.ReadString('\n'); err != nil {
   946  			if err != io.EOF {
   947  				return "", nil, err
   948  			}
   949  			if l == "" {
   950  				break
   951  			}
   952  		}
   953  		if l = strings.TrimSpace(l); l == "" {
   954  			continue
   955  		}
   956  
   957  		if strings.HasPrefix(l, "---") {
   958  			break
   959  		}
   960  		if strings.Contains(l, "same as previous thread") {
   961  			sameAsPrevious = true
   962  			continue
   963  		}
   964  
   965  		addrs = append(addrs, parseHexAddresses(l)...)
   966  	}
   967  
   968  	if sameAsPrevious {
   969  		return l, nil, nil
   970  	}
   971  	return l, addrs, nil
   972  }
   973  
   974  // parseAdditionalSections parses any additional sections in the
   975  // profile, ignoring any unrecognized sections.
   976  func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
   977  	for {
   978  		if sectionTrigger(l) == memoryMapSection {
   979  			break
   980  		}
   981  		// Ignore any unrecognized sections.
   982  		if l, err := b.ReadString('\n'); err != nil {
   983  			if err != io.EOF {
   984  				return err
   985  			}
   986  			if l == "" {
   987  				break
   988  			}
   989  		}
   990  	}
   991  	return p.ParseMemoryMap(b)
   992  }
   993  
   994  // ParseMemoryMap parses a memory map in the format of
   995  // /proc/self/maps, and overrides the mappings in the current profile.
   996  // It renumbers the samples and locations in the profile correspondingly.
   997  func (p *Profile) ParseMemoryMap(rd io.Reader) error {
   998  	b := bufio.NewReader(rd)
   999  
  1000  	var attrs []string
  1001  	var r *strings.Replacer
  1002  	const delimiter = "="
  1003  	for {
  1004  		l, err := b.ReadString('\n')
  1005  		if err != nil {
  1006  			if err != io.EOF {
  1007  				return err
  1008  			}
  1009  			if l == "" {
  1010  				break
  1011  			}
  1012  		}
  1013  		if l = strings.TrimSpace(l); l == "" {
  1014  			continue
  1015  		}
  1016  
  1017  		if r != nil {
  1018  			l = r.Replace(l)
  1019  		}
  1020  		m, err := parseMappingEntry(l)
  1021  		if err != nil {
  1022  			if err == errUnrecognized {
  1023  				// Recognize assignments of the form: attr=value, and replace
  1024  				// $attr with value on subsequent mappings.
  1025  				if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
  1026  					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
  1027  					r = strings.NewReplacer(attrs...)
  1028  				}
  1029  				// Ignore any unrecognized entries
  1030  				continue
  1031  			}
  1032  			return err
  1033  		}
  1034  		if m == nil || (m.File == "" && len(p.Mapping) != 0) {
  1035  			// In some cases the first entry may include the address range
  1036  			// but not the name of the file. It should be followed by
  1037  			// another entry with the name.
  1038  			continue
  1039  		}
  1040  		if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
  1041  			// Update the name if this is the entry following that empty one.
  1042  			p.Mapping[0].File = m.File
  1043  			continue
  1044  		}
  1045  		p.Mapping = append(p.Mapping, m)
  1046  	}
  1047  	p.remapLocationIDs()
  1048  	p.remapFunctionIDs()
  1049  	p.remapMappingIDs()
  1050  	return nil
  1051  }
  1052  
  1053  func parseMappingEntry(l string) (*Mapping, error) {
  1054  	mapping := &Mapping{}
  1055  	var err error
  1056  	if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
  1057  		if !strings.Contains(me[3], "x") {
  1058  			// Skip non-executable entries.
  1059  			return nil, nil
  1060  		}
  1061  		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1062  			return nil, errUnrecognized
  1063  		}
  1064  		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1065  			return nil, errUnrecognized
  1066  		}
  1067  		if me[4] != "" {
  1068  			if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
  1069  				return nil, errUnrecognized
  1070  			}
  1071  		}
  1072  		mapping.File = me[8]
  1073  		return mapping, nil
  1074  	}
  1075  
  1076  	if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
  1077  		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1078  			return nil, errUnrecognized
  1079  		}
  1080  		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1081  			return nil, errUnrecognized
  1082  		}
  1083  		mapping.File = me[3]
  1084  		if me[5] != "" {
  1085  			if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
  1086  				return nil, errUnrecognized
  1087  			}
  1088  		}
  1089  		return mapping, nil
  1090  	}
  1091  
  1092  	return nil, errUnrecognized
  1093  }
  1094  
  1095  type sectionType int
  1096  
  1097  const (
  1098  	unrecognizedSection sectionType = iota
  1099  	memoryMapSection
  1100  )
  1101  
  1102  var memoryMapTriggers = []string{
  1103  	"--- Memory map: ---",
  1104  	"MAPPED_LIBRARIES:",
  1105  }
  1106  
  1107  func sectionTrigger(line string) sectionType {
  1108  	for _, trigger := range memoryMapTriggers {
  1109  		if strings.Contains(line, trigger) {
  1110  			return memoryMapSection
  1111  		}
  1112  	}
  1113  	return unrecognizedSection
  1114  }
  1115  
  1116  func (p *Profile) addLegacyFrameInfo() {
  1117  	switch {
  1118  	case isProfileType(p, heapzSampleTypes) ||
  1119  		isProfileType(p, heapzInUseSampleTypes) ||
  1120  		isProfileType(p, heapzAllocSampleTypes):
  1121  		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  1122  	case isProfileType(p, contentionzSampleTypes):
  1123  		p.DropFrames, p.KeepFrames = lockRxStr, ""
  1124  	default:
  1125  		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  1126  	}
  1127  }
  1128  
  1129  var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
  1130  var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
  1131  var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
  1132  var contentionzSampleTypes = []string{"contentions", "delay"}
  1133  
  1134  func isProfileType(p *Profile, t []string) bool {
  1135  	st := p.SampleType
  1136  	if len(st) != len(t) {
  1137  		return false
  1138  	}
  1139  
  1140  	for i := range st {
  1141  		if st[i].Type != t[i] {
  1142  			return false
  1143  		}
  1144  	}
  1145  	return true
  1146  }
  1147  
  1148  var allocRxStr = strings.Join([]string{
  1149  	// POSIX entry points.
  1150  	`calloc`,
  1151  	`cfree`,
  1152  	`malloc`,
  1153  	`free`,
  1154  	`memalign`,
  1155  	`do_memalign`,
  1156  	`(__)?posix_memalign`,
  1157  	`pvalloc`,
  1158  	`valloc`,
  1159  	`realloc`,
  1160  
  1161  	// TC malloc.
  1162  	`tcmalloc::.*`,
  1163  	`tc_calloc`,
  1164  	`tc_cfree`,
  1165  	`tc_malloc`,
  1166  	`tc_free`,
  1167  	`tc_memalign`,
  1168  	`tc_posix_memalign`,
  1169  	`tc_pvalloc`,
  1170  	`tc_valloc`,
  1171  	`tc_realloc`,
  1172  	`tc_new`,
  1173  	`tc_delete`,
  1174  	`tc_newarray`,
  1175  	`tc_deletearray`,
  1176  	`tc_new_nothrow`,
  1177  	`tc_newarray_nothrow`,
  1178  
  1179  	// Memory-allocation routines on OS X.
  1180  	`malloc_zone_malloc`,
  1181  	`malloc_zone_calloc`,
  1182  	`malloc_zone_valloc`,
  1183  	`malloc_zone_realloc`,
  1184  	`malloc_zone_memalign`,
  1185  	`malloc_zone_free`,
  1186  
  1187  	// Go runtime
  1188  	`runtime\..*`,
  1189  
  1190  	// Other misc. memory allocation routines
  1191  	`BaseArena::.*`,
  1192  	`(::)?do_malloc_no_errno`,
  1193  	`(::)?do_malloc_pages`,
  1194  	`(::)?do_malloc`,
  1195  	`DoSampledAllocation`,
  1196  	`MallocedMemBlock::MallocedMemBlock`,
  1197  	`_M_allocate`,
  1198  	`__builtin_(vec_)?delete`,
  1199  	`__builtin_(vec_)?new`,
  1200  	`__gnu_cxx::new_allocator::allocate`,
  1201  	`__libc_malloc`,
  1202  	`__malloc_alloc_template::allocate`,
  1203  	`allocate`,
  1204  	`cpp_alloc`,
  1205  	`operator new(\[\])?`,
  1206  	`simple_alloc::allocate`,
  1207  }, `|`)
  1208  
  1209  var allocSkipRxStr = strings.Join([]string{
  1210  	// Preserve Go runtime frames that appear in the middle/bottom of
  1211  	// the stack.
  1212  	`runtime\.panic`,
  1213  }, `|`)
  1214  
  1215  var cpuProfilerRxStr = strings.Join([]string{
  1216  	`ProfileData::Add`,
  1217  	`ProfileData::prof_handler`,
  1218  	`CpuProfiler::prof_handler`,
  1219  	`__pthread_sighandler`,
  1220  	`__restore`,
  1221  }, `|`)
  1222  
  1223  var lockRxStr = strings.Join([]string{
  1224  	`RecordLockProfileData`,
  1225  	`(base::)?RecordLockProfileData.*`,
  1226  	`(base::)?SubmitMutexProfileData.*`,
  1227  	`(base::)?SubmitSpinLockProfileData.*`,
  1228  	`(Mutex::)?AwaitCommon.*`,
  1229  	`(Mutex::)?Unlock.*`,
  1230  	`(Mutex::)?UnlockSlow.*`,
  1231  	`(Mutex::)?ReaderUnlock.*`,
  1232  	`(MutexLock::)?~MutexLock.*`,
  1233  	`(SpinLock::)?Unlock.*`,
  1234  	`(SpinLock::)?SlowUnlock.*`,
  1235  	`(SpinLockHolder::)?~SpinLockHolder.*`,
  1236  }, `|`)