github.com/Kolosok86/http@v0.1.2/internal/profile/legacy_profile.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements parsers to convert legacy profiles into the
     6  // profile.proto format.
     7  
     8  package profile
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"fmt"
    14  	"io"
    15  	"math"
    16  	"strconv"
    17  	"strings"
    18  
    19  	"github.com/Kolosok86/http/internal/lazyregexp"
    20  )
    21  
    22  var (
    23  	countStartRE = lazyregexp.New(`\A(\w+) profile: total \d+\n\z`)
    24  	countRE      = lazyregexp.New(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
    25  
    26  	heapHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
    27  	heapSampleRE = lazyregexp.New(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
    28  
    29  	contentionSampleRE = lazyregexp.New(`(\d+) *(\d+) @([ x0-9a-f]*)`)
    30  
    31  	hexNumberRE = lazyregexp.New(`0x[0-9a-f]+`)
    32  
    33  	growthHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
    34  
    35  	fragmentationHeaderRE = lazyregexp.New(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
    36  
    37  	threadzStartRE = lazyregexp.New(`--- threadz \d+ ---`)
    38  	threadStartRE  = lazyregexp.New(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
    39  
    40  	procMapsRE = lazyregexp.New(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
    41  
    42  	briefMapsRE = lazyregexp.New(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
    43  
    44  	// LegacyHeapAllocated instructs the heapz parsers to use the
    45  	// allocated memory stats instead of the default in-use memory. Note
    46  	// that tcmalloc doesn't provide all allocated memory, only in-use
    47  	// stats.
    48  	LegacyHeapAllocated bool
    49  )
    50  
    51  func isSpaceOrComment(line string) bool {
    52  	trimmed := strings.TrimSpace(line)
    53  	return len(trimmed) == 0 || trimmed[0] == '#'
    54  }
    55  
    56  // parseGoCount parses a Go count profile (e.g., threadcreate or
    57  // goroutine) and returns a new Profile.
    58  func parseGoCount(b []byte) (*Profile, error) {
    59  	r := bytes.NewBuffer(b)
    60  
    61  	var line string
    62  	var err error
    63  	for {
    64  		// Skip past comments and empty lines seeking a real header.
    65  		line, err = r.ReadString('\n')
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  		if !isSpaceOrComment(line) {
    70  			break
    71  		}
    72  	}
    73  
    74  	m := countStartRE.FindStringSubmatch(line)
    75  	if m == nil {
    76  		return nil, errUnrecognized
    77  	}
    78  	profileType := m[1]
    79  	p := &Profile{
    80  		PeriodType: &ValueType{Type: profileType, Unit: "count"},
    81  		Period:     1,
    82  		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
    83  	}
    84  	locations := make(map[uint64]*Location)
    85  	for {
    86  		line, err = r.ReadString('\n')
    87  		if err != nil {
    88  			if err == io.EOF {
    89  				break
    90  			}
    91  			return nil, err
    92  		}
    93  		if isSpaceOrComment(line) {
    94  			continue
    95  		}
    96  		if strings.HasPrefix(line, "---") {
    97  			break
    98  		}
    99  		m := countRE.FindStringSubmatch(line)
   100  		if m == nil {
   101  			return nil, errMalformed
   102  		}
   103  		n, err := strconv.ParseInt(m[1], 0, 64)
   104  		if err != nil {
   105  			return nil, errMalformed
   106  		}
   107  		fields := strings.Fields(m[2])
   108  		locs := make([]*Location, 0, len(fields))
   109  		for _, stk := range fields {
   110  			addr, err := strconv.ParseUint(stk, 0, 64)
   111  			if err != nil {
   112  				return nil, errMalformed
   113  			}
   114  			// Adjust all frames by -1 to land on the call instruction.
   115  			addr--
   116  			loc := locations[addr]
   117  			if loc == nil {
   118  				loc = &Location{
   119  					Address: addr,
   120  				}
   121  				locations[addr] = loc
   122  				p.Location = append(p.Location, loc)
   123  			}
   124  			locs = append(locs, loc)
   125  		}
   126  		p.Sample = append(p.Sample, &Sample{
   127  			Location: locs,
   128  			Value:    []int64{n},
   129  		})
   130  	}
   131  
   132  	if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
   133  		return nil, err
   134  	}
   135  	return p, nil
   136  }
   137  
   138  // remapLocationIDs ensures there is a location for each address
   139  // referenced by a sample, and remaps the samples to point to the new
   140  // location ids.
   141  func (p *Profile) remapLocationIDs() {
   142  	seen := make(map[*Location]bool, len(p.Location))
   143  	var locs []*Location
   144  
   145  	for _, s := range p.Sample {
   146  		for _, l := range s.Location {
   147  			if seen[l] {
   148  				continue
   149  			}
   150  			l.ID = uint64(len(locs) + 1)
   151  			locs = append(locs, l)
   152  			seen[l] = true
   153  		}
   154  	}
   155  	p.Location = locs
   156  }
   157  
   158  func (p *Profile) remapFunctionIDs() {
   159  	seen := make(map[*Function]bool, len(p.Function))
   160  	var fns []*Function
   161  
   162  	for _, l := range p.Location {
   163  		for _, ln := range l.Line {
   164  			fn := ln.Function
   165  			if fn == nil || seen[fn] {
   166  				continue
   167  			}
   168  			fn.ID = uint64(len(fns) + 1)
   169  			fns = append(fns, fn)
   170  			seen[fn] = true
   171  		}
   172  	}
   173  	p.Function = fns
   174  }
   175  
   176  // remapMappingIDs matches location addresses with existing mappings
   177  // and updates them appropriately. This is O(N*M), if this ever shows
   178  // up as a bottleneck, evaluate sorting the mappings and doing a
   179  // binary search, which would make it O(N*log(M)).
   180  func (p *Profile) remapMappingIDs() {
   181  	if len(p.Mapping) == 0 {
   182  		return
   183  	}
   184  
   185  	// Some profile handlers will incorrectly set regions for the main
   186  	// executable if its section is remapped. Fix them through heuristics.
   187  
   188  	// Remove the initial mapping if named '/anon_hugepage' and has a
   189  	// consecutive adjacent mapping.
   190  	if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
   191  		if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
   192  			p.Mapping = p.Mapping[1:]
   193  		}
   194  	}
   195  
   196  	for _, l := range p.Location {
   197  		if a := l.Address; a != 0 {
   198  			for _, m := range p.Mapping {
   199  				if m.Start <= a && a < m.Limit {
   200  					l.Mapping = m
   201  					break
   202  				}
   203  			}
   204  		}
   205  	}
   206  
   207  	// Reset all mapping IDs.
   208  	for i, m := range p.Mapping {
   209  		m.ID = uint64(i + 1)
   210  	}
   211  }
   212  
   213  var cpuInts = []func([]byte) (uint64, []byte){
   214  	get32l,
   215  	get32b,
   216  	get64l,
   217  	get64b,
   218  }
   219  
   220  func get32l(b []byte) (uint64, []byte) {
   221  	if len(b) < 4 {
   222  		return 0, nil
   223  	}
   224  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
   225  }
   226  
   227  func get32b(b []byte) (uint64, []byte) {
   228  	if len(b) < 4 {
   229  		return 0, nil
   230  	}
   231  	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
   232  }
   233  
   234  func get64l(b []byte) (uint64, []byte) {
   235  	if len(b) < 8 {
   236  		return 0, nil
   237  	}
   238  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
   239  }
   240  
   241  func get64b(b []byte) (uint64, []byte) {
   242  	if len(b) < 8 {
   243  		return 0, nil
   244  	}
   245  	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
   246  }
   247  
   248  // ParseTracebacks parses a set of tracebacks and returns a newly
   249  // populated profile. It will accept any text file and generate a
   250  // Profile out of it with any hex addresses it can identify, including
   251  // a process map if it can recognize one. Each sample will include a
   252  // tag "source" with the addresses recognized in string format.
   253  func ParseTracebacks(b []byte) (*Profile, error) {
   254  	r := bytes.NewBuffer(b)
   255  
   256  	p := &Profile{
   257  		PeriodType: &ValueType{Type: "trace", Unit: "count"},
   258  		Period:     1,
   259  		SampleType: []*ValueType{
   260  			{Type: "trace", Unit: "count"},
   261  		},
   262  	}
   263  
   264  	var sources []string
   265  	var sloc []*Location
   266  
   267  	locs := make(map[uint64]*Location)
   268  	for {
   269  		l, err := r.ReadString('\n')
   270  		if err != nil {
   271  			if err != io.EOF {
   272  				return nil, err
   273  			}
   274  			if l == "" {
   275  				break
   276  			}
   277  		}
   278  		if sectionTrigger(l) == memoryMapSection {
   279  			break
   280  		}
   281  		if s, addrs := extractHexAddresses(l); len(s) > 0 {
   282  			for _, addr := range addrs {
   283  				// Addresses from stack traces point to the next instruction after
   284  				// each call. Adjust by -1 to land somewhere on the actual call.
   285  				addr--
   286  				loc := locs[addr]
   287  				if locs[addr] == nil {
   288  					loc = &Location{
   289  						Address: addr,
   290  					}
   291  					p.Location = append(p.Location, loc)
   292  					locs[addr] = loc
   293  				}
   294  				sloc = append(sloc, loc)
   295  			}
   296  
   297  			sources = append(sources, s...)
   298  		} else {
   299  			if len(sources) > 0 || len(sloc) > 0 {
   300  				addTracebackSample(sloc, sources, p)
   301  				sloc, sources = nil, nil
   302  			}
   303  		}
   304  	}
   305  
   306  	// Add final sample to save any leftover data.
   307  	if len(sources) > 0 || len(sloc) > 0 {
   308  		addTracebackSample(sloc, sources, p)
   309  	}
   310  
   311  	if err := p.ParseMemoryMap(r); err != nil {
   312  		return nil, err
   313  	}
   314  	return p, nil
   315  }
   316  
   317  func addTracebackSample(l []*Location, s []string, p *Profile) {
   318  	p.Sample = append(p.Sample,
   319  		&Sample{
   320  			Value:    []int64{1},
   321  			Location: l,
   322  			Label:    map[string][]string{"source": s},
   323  		})
   324  }
   325  
   326  // parseCPU parses a profilez legacy profile and returns a newly
   327  // populated Profile.
   328  //
   329  // The general format for profilez samples is a sequence of words in
   330  // binary format. The first words are a header with the following data:
   331  //
   332  //	1st word -- 0
   333  //	2nd word -- 3
   334  //	3rd word -- 0 if a c++ application, 1 if a java application.
   335  //	4th word -- Sampling period (in microseconds).
   336  //	5th word -- Padding.
   337  func parseCPU(b []byte) (*Profile, error) {
   338  	var parse func([]byte) (uint64, []byte)
   339  	var n1, n2, n3, n4, n5 uint64
   340  	for _, parse = range cpuInts {
   341  		var tmp []byte
   342  		n1, tmp = parse(b)
   343  		n2, tmp = parse(tmp)
   344  		n3, tmp = parse(tmp)
   345  		n4, tmp = parse(tmp)
   346  		n5, tmp = parse(tmp)
   347  
   348  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
   349  			b = tmp
   350  			return cpuProfile(b, int64(n4), parse)
   351  		}
   352  	}
   353  	return nil, errUnrecognized
   354  }
   355  
   356  // cpuProfile returns a new Profile from C++ profilez data.
   357  // b is the profile bytes after the header, period is the profiling
   358  // period, and parse is a function to parse 8-byte chunks from the
   359  // profile in its native endianness.
   360  func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
   361  	p := &Profile{
   362  		Period:     period * 1000,
   363  		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
   364  		SampleType: []*ValueType{
   365  			{Type: "samples", Unit: "count"},
   366  			{Type: "cpu", Unit: "nanoseconds"},
   367  		},
   368  	}
   369  	var err error
   370  	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
   371  		return nil, err
   372  	}
   373  
   374  	// If all samples have the same second-to-the-bottom frame, it
   375  	// strongly suggests that it is an uninteresting artifact of
   376  	// measurement -- a stack frame pushed by the signal handler. The
   377  	// bottom frame is always correct as it is picked up from the signal
   378  	// structure, not the stack. Check if this is the case and if so,
   379  	// remove.
   380  	if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
   381  		allSame := true
   382  		id1 := p.Sample[0].Location[1].Address
   383  		for _, s := range p.Sample {
   384  			if len(s.Location) < 2 || id1 != s.Location[1].Address {
   385  				allSame = false
   386  				break
   387  			}
   388  		}
   389  		if allSame {
   390  			for _, s := range p.Sample {
   391  				s.Location = append(s.Location[:1], s.Location[2:]...)
   392  			}
   393  		}
   394  	}
   395  
   396  	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
   397  		return nil, err
   398  	}
   399  	return p, nil
   400  }
   401  
   402  // parseCPUSamples parses a collection of profilez samples from a
   403  // profile.
   404  //
   405  // profilez samples are a repeated sequence of stack frames of the
   406  // form:
   407  //
   408  //	1st word -- The number of times this stack was encountered.
   409  //	2nd word -- The size of the stack (StackSize).
   410  //	3rd word -- The first address on the stack.
   411  //	...
   412  //	StackSize + 2 -- The last address on the stack
   413  //
   414  // The last stack trace is of the form:
   415  //
   416  //	1st word -- 0
   417  //	2nd word -- 1
   418  //	3rd word -- 0
   419  //
   420  // Addresses from stack traces may point to the next instruction after
   421  // each call. Optionally adjust by -1 to land somewhere on the actual
   422  // call (except for the leaf, which is not a call).
   423  func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
   424  	locs := make(map[uint64]*Location)
   425  	for len(b) > 0 {
   426  		var count, nstk uint64
   427  		count, b = parse(b)
   428  		nstk, b = parse(b)
   429  		if b == nil || nstk > uint64(len(b)/4) {
   430  			return nil, nil, errUnrecognized
   431  		}
   432  		var sloc []*Location
   433  		addrs := make([]uint64, nstk)
   434  		for i := 0; i < int(nstk); i++ {
   435  			addrs[i], b = parse(b)
   436  		}
   437  
   438  		if count == 0 && nstk == 1 && addrs[0] == 0 {
   439  			// End of data marker
   440  			break
   441  		}
   442  		for i, addr := range addrs {
   443  			if adjust && i > 0 {
   444  				addr--
   445  			}
   446  			loc := locs[addr]
   447  			if loc == nil {
   448  				loc = &Location{
   449  					Address: addr,
   450  				}
   451  				locs[addr] = loc
   452  				p.Location = append(p.Location, loc)
   453  			}
   454  			sloc = append(sloc, loc)
   455  		}
   456  		p.Sample = append(p.Sample,
   457  			&Sample{
   458  				Value:    []int64{int64(count), int64(count) * p.Period},
   459  				Location: sloc,
   460  			})
   461  	}
   462  	// Reached the end without finding the EOD marker.
   463  	return b, locs, nil
   464  }
   465  
   466  // parseHeap parses a heapz legacy or a growthz profile and
   467  // returns a newly populated Profile.
   468  func parseHeap(b []byte) (p *Profile, err error) {
   469  	r := bytes.NewBuffer(b)
   470  	l, err := r.ReadString('\n')
   471  	if err != nil {
   472  		return nil, errUnrecognized
   473  	}
   474  
   475  	sampling := ""
   476  
   477  	if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
   478  		p = &Profile{
   479  			SampleType: []*ValueType{
   480  				{Type: "objects", Unit: "count"},
   481  				{Type: "space", Unit: "bytes"},
   482  			},
   483  			PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
   484  		}
   485  
   486  		var period int64
   487  		if len(header[6]) > 0 {
   488  			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
   489  				return nil, errUnrecognized
   490  			}
   491  		}
   492  
   493  		switch header[5] {
   494  		case "heapz_v2", "heap_v2":
   495  			sampling, p.Period = "v2", period
   496  		case "heapprofile":
   497  			sampling, p.Period = "", 1
   498  		case "heap":
   499  			sampling, p.Period = "v2", period/2
   500  		default:
   501  			return nil, errUnrecognized
   502  		}
   503  	} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
   504  		p = &Profile{
   505  			SampleType: []*ValueType{
   506  				{Type: "objects", Unit: "count"},
   507  				{Type: "space", Unit: "bytes"},
   508  			},
   509  			PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
   510  			Period:     1,
   511  		}
   512  	} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
   513  		p = &Profile{
   514  			SampleType: []*ValueType{
   515  				{Type: "objects", Unit: "count"},
   516  				{Type: "space", Unit: "bytes"},
   517  			},
   518  			PeriodType: &ValueType{Type: "allocations", Unit: "count"},
   519  			Period:     1,
   520  		}
   521  	} else {
   522  		return nil, errUnrecognized
   523  	}
   524  
   525  	if LegacyHeapAllocated {
   526  		for _, st := range p.SampleType {
   527  			st.Type = "alloc_" + st.Type
   528  		}
   529  	} else {
   530  		for _, st := range p.SampleType {
   531  			st.Type = "inuse_" + st.Type
   532  		}
   533  	}
   534  
   535  	locs := make(map[uint64]*Location)
   536  	for {
   537  		l, err = r.ReadString('\n')
   538  		if err != nil {
   539  			if err != io.EOF {
   540  				return nil, err
   541  			}
   542  
   543  			if l == "" {
   544  				break
   545  			}
   546  		}
   547  
   548  		if isSpaceOrComment(l) {
   549  			continue
   550  		}
   551  		l = strings.TrimSpace(l)
   552  
   553  		if sectionTrigger(l) != unrecognizedSection {
   554  			break
   555  		}
   556  
   557  		value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
   558  		if err != nil {
   559  			return nil, err
   560  		}
   561  		var sloc []*Location
   562  		for _, addr := range addrs {
   563  			// Addresses from stack traces point to the next instruction after
   564  			// each call. Adjust by -1 to land somewhere on the actual call.
   565  			addr--
   566  			loc := locs[addr]
   567  			if locs[addr] == nil {
   568  				loc = &Location{
   569  					Address: addr,
   570  				}
   571  				p.Location = append(p.Location, loc)
   572  				locs[addr] = loc
   573  			}
   574  			sloc = append(sloc, loc)
   575  		}
   576  
   577  		p.Sample = append(p.Sample, &Sample{
   578  			Value:    value,
   579  			Location: sloc,
   580  			NumLabel: map[string][]int64{"bytes": {blocksize}},
   581  		})
   582  	}
   583  
   584  	if err = parseAdditionalSections(l, r, p); err != nil {
   585  		return nil, err
   586  	}
   587  	return p, nil
   588  }
   589  
   590  // parseHeapSample parses a single row from a heap profile into a new Sample.
   591  func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
   592  	sampleData := heapSampleRE.FindStringSubmatch(line)
   593  	if len(sampleData) != 6 {
   594  		return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
   595  	}
   596  
   597  	// Use first two values by default; tcmalloc sampling generates the
   598  	// same value for both, only the older heap-profile collect separate
   599  	// stats for in-use and allocated objects.
   600  	valueIndex := 1
   601  	if LegacyHeapAllocated {
   602  		valueIndex = 3
   603  	}
   604  
   605  	var v1, v2 int64
   606  	if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
   607  		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   608  	}
   609  	if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
   610  		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   611  	}
   612  
   613  	if v1 == 0 {
   614  		if v2 != 0 {
   615  			return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
   616  		}
   617  	} else {
   618  		blocksize = v2 / v1
   619  		if sampling == "v2" {
   620  			v1, v2 = scaleHeapSample(v1, v2, rate)
   621  		}
   622  	}
   623  
   624  	value = []int64{v1, v2}
   625  	addrs = parseHexAddresses(sampleData[5])
   626  
   627  	return value, blocksize, addrs, nil
   628  }
   629  
   630  // extractHexAddresses extracts hex numbers from a string and returns
   631  // them, together with their numeric value, in a slice.
   632  func extractHexAddresses(s string) ([]string, []uint64) {
   633  	hexStrings := hexNumberRE.FindAllString(s, -1)
   634  	var ids []uint64
   635  	for _, s := range hexStrings {
   636  		if id, err := strconv.ParseUint(s, 0, 64); err == nil {
   637  			ids = append(ids, id)
   638  		} else {
   639  			// Do not expect any parsing failures due to the regexp matching.
   640  			panic("failed to parse hex value:" + s)
   641  		}
   642  	}
   643  	return hexStrings, ids
   644  }
   645  
   646  // parseHexAddresses parses hex numbers from a string and returns them
   647  // in a slice.
   648  func parseHexAddresses(s string) []uint64 {
   649  	_, ids := extractHexAddresses(s)
   650  	return ids
   651  }
   652  
   653  // scaleHeapSample adjusts the data from a heapz Sample to
   654  // account for its probability of appearing in the collected
   655  // data. heapz profiles are a sampling of the memory allocations
   656  // requests in a program. We estimate the unsampled value by dividing
   657  // each collected sample by its probability of appearing in the
   658  // profile. heapz v2 profiles rely on a poisson process to determine
   659  // which samples to collect, based on the desired average collection
   660  // rate R. The probability of a sample of size S to appear in that
   661  // profile is 1-exp(-S/R).
   662  func scaleHeapSample(count, size, rate int64) (int64, int64) {
   663  	if count == 0 || size == 0 {
   664  		return 0, 0
   665  	}
   666  
   667  	if rate <= 1 {
   668  		// if rate==1 all samples were collected so no adjustment is needed.
   669  		// if rate<1 treat as unknown and skip scaling.
   670  		return count, size
   671  	}
   672  
   673  	avgSize := float64(size) / float64(count)
   674  	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
   675  
   676  	return int64(float64(count) * scale), int64(float64(size) * scale)
   677  }
   678  
   679  // parseContention parses a mutex or contention profile. There are 2 cases:
   680  // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
   681  // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
   682  // This code converts the text output from runtime into a *Profile. (In the future
   683  // the runtime might write a serialized Profile directly making this unnecessary.)
   684  func parseContention(b []byte) (*Profile, error) {
   685  	r := bytes.NewBuffer(b)
   686  	var l string
   687  	var err error
   688  	for {
   689  		// Skip past comments and empty lines seeking a real header.
   690  		l, err = r.ReadString('\n')
   691  		if err != nil {
   692  			return nil, err
   693  		}
   694  		if !isSpaceOrComment(l) {
   695  			break
   696  		}
   697  	}
   698  
   699  	if strings.HasPrefix(l, "--- contentionz ") {
   700  		return parseCppContention(r)
   701  	} else if strings.HasPrefix(l, "--- mutex:") {
   702  		return parseCppContention(r)
   703  	} else if strings.HasPrefix(l, "--- contention:") {
   704  		return parseCppContention(r)
   705  	}
   706  	return nil, errUnrecognized
   707  }
   708  
   709  // parseCppContention parses the output from synchronization_profiling.cc
   710  // for backward compatibility, and the compatible (non-debug) block profile
   711  // output from the Go runtime.
   712  func parseCppContention(r *bytes.Buffer) (*Profile, error) {
   713  	p := &Profile{
   714  		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
   715  		Period:     1,
   716  		SampleType: []*ValueType{
   717  			{Type: "contentions", Unit: "count"},
   718  			{Type: "delay", Unit: "nanoseconds"},
   719  		},
   720  	}
   721  
   722  	var cpuHz int64
   723  	var l string
   724  	var err error
   725  	// Parse text of the form "attribute = value" before the samples.
   726  	const delimiter = '='
   727  	for {
   728  		l, err = r.ReadString('\n')
   729  		if err != nil {
   730  			if err != io.EOF {
   731  				return nil, err
   732  			}
   733  
   734  			if l == "" {
   735  				break
   736  			}
   737  		}
   738  		if isSpaceOrComment(l) {
   739  			continue
   740  		}
   741  
   742  		if l = strings.TrimSpace(l); l == "" {
   743  			continue
   744  		}
   745  
   746  		if strings.HasPrefix(l, "---") {
   747  			break
   748  		}
   749  
   750  		index := strings.IndexByte(l, delimiter)
   751  		if index < 0 {
   752  			break
   753  		}
   754  		key := l[:index]
   755  		val := l[index+1:]
   756  
   757  		key, val = strings.TrimSpace(key), strings.TrimSpace(val)
   758  		var err error
   759  		switch key {
   760  		case "cycles/second":
   761  			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
   762  				return nil, errUnrecognized
   763  			}
   764  		case "sampling period":
   765  			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
   766  				return nil, errUnrecognized
   767  			}
   768  		case "ms since reset":
   769  			ms, err := strconv.ParseInt(val, 0, 64)
   770  			if err != nil {
   771  				return nil, errUnrecognized
   772  			}
   773  			p.DurationNanos = ms * 1000 * 1000
   774  		case "format":
   775  			// CPP contentionz profiles don't have format.
   776  			return nil, errUnrecognized
   777  		case "resolution":
   778  			// CPP contentionz profiles don't have resolution.
   779  			return nil, errUnrecognized
   780  		case "discarded samples":
   781  		default:
   782  			return nil, errUnrecognized
   783  		}
   784  	}
   785  
   786  	locs := make(map[uint64]*Location)
   787  	for {
   788  		if !isSpaceOrComment(l) {
   789  			if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
   790  				break
   791  			}
   792  			value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
   793  			if err != nil {
   794  				return nil, err
   795  			}
   796  			var sloc []*Location
   797  			for _, addr := range addrs {
   798  				// Addresses from stack traces point to the next instruction after
   799  				// each call. Adjust by -1 to land somewhere on the actual call.
   800  				addr--
   801  				loc := locs[addr]
   802  				if locs[addr] == nil {
   803  					loc = &Location{
   804  						Address: addr,
   805  					}
   806  					p.Location = append(p.Location, loc)
   807  					locs[addr] = loc
   808  				}
   809  				sloc = append(sloc, loc)
   810  			}
   811  			p.Sample = append(p.Sample, &Sample{
   812  				Value:    value,
   813  				Location: sloc,
   814  			})
   815  		}
   816  
   817  		if l, err = r.ReadString('\n'); err != nil {
   818  			if err != io.EOF {
   819  				return nil, err
   820  			}
   821  			if l == "" {
   822  				break
   823  			}
   824  		}
   825  	}
   826  
   827  	if err = parseAdditionalSections(l, r, p); err != nil {
   828  		return nil, err
   829  	}
   830  
   831  	return p, nil
   832  }
   833  
   834  // parseContentionSample parses a single row from a contention profile
   835  // into a new Sample.
   836  func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
   837  	sampleData := contentionSampleRE.FindStringSubmatch(line)
   838  	if sampleData == nil {
   839  		return value, addrs, errUnrecognized
   840  	}
   841  
   842  	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
   843  	if err != nil {
   844  		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   845  	}
   846  	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
   847  	if err != nil {
   848  		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
   849  	}
   850  
   851  	// Unsample values if period and cpuHz are available.
   852  	// - Delays are scaled to cycles and then to nanoseconds.
   853  	// - Contentions are scaled to cycles.
   854  	if period > 0 {
   855  		if cpuHz > 0 {
   856  			cpuGHz := float64(cpuHz) / 1e9
   857  			v1 = int64(float64(v1) * float64(period) / cpuGHz)
   858  		}
   859  		v2 = v2 * period
   860  	}
   861  
   862  	value = []int64{v2, v1}
   863  	addrs = parseHexAddresses(sampleData[3])
   864  
   865  	return value, addrs, nil
   866  }
   867  
   868  // parseThread parses a Threadz profile and returns a new Profile.
   869  func parseThread(b []byte) (*Profile, error) {
   870  	r := bytes.NewBuffer(b)
   871  
   872  	var line string
   873  	var err error
   874  	for {
   875  		// Skip past comments and empty lines seeking a real header.
   876  		line, err = r.ReadString('\n')
   877  		if err != nil {
   878  			return nil, err
   879  		}
   880  		if !isSpaceOrComment(line) {
   881  			break
   882  		}
   883  	}
   884  
   885  	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
   886  		// Advance over initial comments until first stack trace.
   887  		for {
   888  			line, err = r.ReadString('\n')
   889  			if err != nil {
   890  				if err != io.EOF {
   891  					return nil, err
   892  				}
   893  
   894  				if line == "" {
   895  					break
   896  				}
   897  			}
   898  			if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
   899  				break
   900  			}
   901  		}
   902  	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   903  		return nil, errUnrecognized
   904  	}
   905  
   906  	p := &Profile{
   907  		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
   908  		PeriodType: &ValueType{Type: "thread", Unit: "count"},
   909  		Period:     1,
   910  	}
   911  
   912  	locs := make(map[uint64]*Location)
   913  	// Recognize each thread and populate profile samples.
   914  	for sectionTrigger(line) == unrecognizedSection {
   915  		if strings.HasPrefix(line, "---- no stack trace for") {
   916  			line = ""
   917  			break
   918  		}
   919  		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   920  			return nil, errUnrecognized
   921  		}
   922  
   923  		var addrs []uint64
   924  		line, addrs, err = parseThreadSample(r)
   925  		if err != nil {
   926  			return nil, errUnrecognized
   927  		}
   928  		if len(addrs) == 0 {
   929  			// We got a --same as previous threads--. Bump counters.
   930  			if len(p.Sample) > 0 {
   931  				s := p.Sample[len(p.Sample)-1]
   932  				s.Value[0]++
   933  			}
   934  			continue
   935  		}
   936  
   937  		var sloc []*Location
   938  		for _, addr := range addrs {
   939  			// Addresses from stack traces point to the next instruction after
   940  			// each call. Adjust by -1 to land somewhere on the actual call.
   941  			addr--
   942  			loc := locs[addr]
   943  			if locs[addr] == nil {
   944  				loc = &Location{
   945  					Address: addr,
   946  				}
   947  				p.Location = append(p.Location, loc)
   948  				locs[addr] = loc
   949  			}
   950  			sloc = append(sloc, loc)
   951  		}
   952  
   953  		p.Sample = append(p.Sample, &Sample{
   954  			Value:    []int64{1},
   955  			Location: sloc,
   956  		})
   957  	}
   958  
   959  	if err = parseAdditionalSections(line, r, p); err != nil {
   960  		return nil, err
   961  	}
   962  
   963  	return p, nil
   964  }
   965  
   966  // parseThreadSample parses a symbolized or unsymbolized stack trace.
   967  // Returns the first line after the traceback, the sample (or nil if
   968  // it hits a 'same-as-previous' marker) and an error.
   969  func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
   970  	var l string
   971  	sameAsPrevious := false
   972  	for {
   973  		if l, err = b.ReadString('\n'); err != nil {
   974  			if err != io.EOF {
   975  				return "", nil, err
   976  			}
   977  			if l == "" {
   978  				break
   979  			}
   980  		}
   981  		if l = strings.TrimSpace(l); l == "" {
   982  			continue
   983  		}
   984  
   985  		if strings.HasPrefix(l, "---") {
   986  			break
   987  		}
   988  		if strings.Contains(l, "same as previous thread") {
   989  			sameAsPrevious = true
   990  			continue
   991  		}
   992  
   993  		addrs = append(addrs, parseHexAddresses(l)...)
   994  	}
   995  
   996  	if sameAsPrevious {
   997  		return l, nil, nil
   998  	}
   999  	return l, addrs, nil
  1000  }
  1001  
  1002  // parseAdditionalSections parses any additional sections in the
  1003  // profile, ignoring any unrecognized sections.
  1004  func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
  1005  	for {
  1006  		if sectionTrigger(l) == memoryMapSection {
  1007  			break
  1008  		}
  1009  		// Ignore any unrecognized sections.
  1010  		if l, err := b.ReadString('\n'); err != nil {
  1011  			if err != io.EOF {
  1012  				return err
  1013  			}
  1014  			if l == "" {
  1015  				break
  1016  			}
  1017  		}
  1018  	}
  1019  	return p.ParseMemoryMap(b)
  1020  }
  1021  
  1022  // ParseMemoryMap parses a memory map in the format of
  1023  // /proc/self/maps, and overrides the mappings in the current profile.
  1024  // It renumbers the samples and locations in the profile correspondingly.
  1025  func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  1026  	b := bufio.NewReader(rd)
  1027  
  1028  	var attrs []string
  1029  	var r *strings.Replacer
  1030  	const delimiter = '='
  1031  	for {
  1032  		l, err := b.ReadString('\n')
  1033  		if err != nil {
  1034  			if err != io.EOF {
  1035  				return err
  1036  			}
  1037  			if l == "" {
  1038  				break
  1039  			}
  1040  		}
  1041  		if l = strings.TrimSpace(l); l == "" {
  1042  			continue
  1043  		}
  1044  
  1045  		if r != nil {
  1046  			l = r.Replace(l)
  1047  		}
  1048  		m, err := parseMappingEntry(l)
  1049  		if err != nil {
  1050  			if err == errUnrecognized {
  1051  				// Recognize assignments of the form: attr=value, and replace
  1052  				// $attr with value on subsequent mappings.
  1053  				idx := strings.IndexByte(l, delimiter)
  1054  				if idx >= 0 {
  1055  					attr := l[:idx]
  1056  					value := l[idx+1:]
  1057  					attrs = append(attrs, "$"+strings.TrimSpace(attr), strings.TrimSpace(value))
  1058  					r = strings.NewReplacer(attrs...)
  1059  				}
  1060  				// Ignore any unrecognized entries
  1061  				continue
  1062  			}
  1063  			return err
  1064  		}
  1065  		if m == nil || (m.File == "" && len(p.Mapping) != 0) {
  1066  			// In some cases the first entry may include the address range
  1067  			// but not the name of the file. It should be followed by
  1068  			// another entry with the name.
  1069  			continue
  1070  		}
  1071  		if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
  1072  			// Update the name if this is the entry following that empty one.
  1073  			p.Mapping[0].File = m.File
  1074  			continue
  1075  		}
  1076  		p.Mapping = append(p.Mapping, m)
  1077  	}
  1078  	p.remapLocationIDs()
  1079  	p.remapFunctionIDs()
  1080  	p.remapMappingIDs()
  1081  	return nil
  1082  }
  1083  
  1084  func parseMappingEntry(l string) (*Mapping, error) {
  1085  	mapping := &Mapping{}
  1086  	var err error
  1087  	if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
  1088  		if !strings.Contains(me[3], "x") {
  1089  			// Skip non-executable entries.
  1090  			return nil, nil
  1091  		}
  1092  		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1093  			return nil, errUnrecognized
  1094  		}
  1095  		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1096  			return nil, errUnrecognized
  1097  		}
  1098  		if me[4] != "" {
  1099  			if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
  1100  				return nil, errUnrecognized
  1101  			}
  1102  		}
  1103  		mapping.File = me[8]
  1104  		return mapping, nil
  1105  	}
  1106  
  1107  	if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
  1108  		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  1109  			return nil, errUnrecognized
  1110  		}
  1111  		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  1112  			return nil, errUnrecognized
  1113  		}
  1114  		mapping.File = me[3]
  1115  		if me[5] != "" {
  1116  			if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
  1117  				return nil, errUnrecognized
  1118  			}
  1119  		}
  1120  		return mapping, nil
  1121  	}
  1122  
  1123  	return nil, errUnrecognized
  1124  }
  1125  
  1126  type sectionType int
  1127  
  1128  const (
  1129  	unrecognizedSection sectionType = iota
  1130  	memoryMapSection
  1131  )
  1132  
  1133  var memoryMapTriggers = []string{
  1134  	"--- Memory map: ---",
  1135  	"MAPPED_LIBRARIES:",
  1136  }
  1137  
  1138  func sectionTrigger(line string) sectionType {
  1139  	for _, trigger := range memoryMapTriggers {
  1140  		if strings.Contains(line, trigger) {
  1141  			return memoryMapSection
  1142  		}
  1143  	}
  1144  	return unrecognizedSection
  1145  }
  1146  
  1147  func (p *Profile) addLegacyFrameInfo() {
  1148  	switch {
  1149  	case isProfileType(p, heapzSampleTypes) ||
  1150  		isProfileType(p, heapzInUseSampleTypes) ||
  1151  		isProfileType(p, heapzAllocSampleTypes):
  1152  		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  1153  	case isProfileType(p, contentionzSampleTypes):
  1154  		p.DropFrames, p.KeepFrames = lockRxStr, ""
  1155  	default:
  1156  		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  1157  	}
  1158  }
  1159  
  1160  var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
  1161  var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
  1162  var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
  1163  var contentionzSampleTypes = []string{"contentions", "delay"}
  1164  
  1165  func isProfileType(p *Profile, t []string) bool {
  1166  	st := p.SampleType
  1167  	if len(st) != len(t) {
  1168  		return false
  1169  	}
  1170  
  1171  	for i := range st {
  1172  		if st[i].Type != t[i] {
  1173  			return false
  1174  		}
  1175  	}
  1176  	return true
  1177  }
  1178  
  1179  var allocRxStr = strings.Join([]string{
  1180  	// POSIX entry points.
  1181  	`calloc`,
  1182  	`cfree`,
  1183  	`malloc`,
  1184  	`free`,
  1185  	`memalign`,
  1186  	`do_memalign`,
  1187  	`(__)?posix_memalign`,
  1188  	`pvalloc`,
  1189  	`valloc`,
  1190  	`realloc`,
  1191  
  1192  	// TC malloc.
  1193  	`tcmalloc::.*`,
  1194  	`tc_calloc`,
  1195  	`tc_cfree`,
  1196  	`tc_malloc`,
  1197  	`tc_free`,
  1198  	`tc_memalign`,
  1199  	`tc_posix_memalign`,
  1200  	`tc_pvalloc`,
  1201  	`tc_valloc`,
  1202  	`tc_realloc`,
  1203  	`tc_new`,
  1204  	`tc_delete`,
  1205  	`tc_newarray`,
  1206  	`tc_deletearray`,
  1207  	`tc_new_nothrow`,
  1208  	`tc_newarray_nothrow`,
  1209  
  1210  	// Memory-allocation routines on OS X.
  1211  	`malloc_zone_malloc`,
  1212  	`malloc_zone_calloc`,
  1213  	`malloc_zone_valloc`,
  1214  	`malloc_zone_realloc`,
  1215  	`malloc_zone_memalign`,
  1216  	`malloc_zone_free`,
  1217  
  1218  	// Go runtime
  1219  	`runtime\..*`,
  1220  
  1221  	// Other misc. memory allocation routines
  1222  	`BaseArena::.*`,
  1223  	`(::)?do_malloc_no_errno`,
  1224  	`(::)?do_malloc_pages`,
  1225  	`(::)?do_malloc`,
  1226  	`DoSampledAllocation`,
  1227  	`MallocedMemBlock::MallocedMemBlock`,
  1228  	`_M_allocate`,
  1229  	`__builtin_(vec_)?delete`,
  1230  	`__builtin_(vec_)?new`,
  1231  	`__gnu_cxx::new_allocator::allocate`,
  1232  	`__libc_malloc`,
  1233  	`__malloc_alloc_template::allocate`,
  1234  	`allocate`,
  1235  	`cpp_alloc`,
  1236  	`operator new(\[\])?`,
  1237  	`simple_alloc::allocate`,
  1238  }, `|`)
  1239  
  1240  var allocSkipRxStr = strings.Join([]string{
  1241  	// Preserve Go runtime frames that appear in the middle/bottom of
  1242  	// the stack.
  1243  	`runtime\.panic`,
  1244  	`runtime\.reflectcall`,
  1245  	`runtime\.call[0-9]*`,
  1246  }, `|`)
  1247  
  1248  var cpuProfilerRxStr = strings.Join([]string{
  1249  	`ProfileData::Add`,
  1250  	`ProfileData::prof_handler`,
  1251  	`CpuProfiler::prof_handler`,
  1252  	`__pthread_sighandler`,
  1253  	`__restore`,
  1254  }, `|`)
  1255  
  1256  var lockRxStr = strings.Join([]string{
  1257  	`RecordLockProfileData`,
  1258  	`(base::)?RecordLockProfileData.*`,
  1259  	`(base::)?SubmitMutexProfileData.*`,
  1260  	`(base::)?SubmitSpinLockProfileData.*`,
  1261  	`(Mutex::)?AwaitCommon.*`,
  1262  	`(Mutex::)?Unlock.*`,
  1263  	`(Mutex::)?UnlockSlow.*`,
  1264  	`(Mutex::)?ReaderUnlock.*`,
  1265  	`(MutexLock::)?~MutexLock.*`,
  1266  	`(SpinLock::)?Unlock.*`,
  1267  	`(SpinLock::)?SlowUnlock.*`,
  1268  	`(SpinLockHolder::)?~SpinLockHolder.*`,
  1269  }, `|`)