github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/agent/pprof/proto.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pprof
     6  
     7  import (
     8  	"bytes"
     9  	"compress/gzip"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"runtime"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"time"
    18  	"unsafe"
    19  )
    20  
    21  // lostProfileEvent is the function to which lost profiling
    22  // events are attributed.
    23  // (The name shows up in the pprof graphs.)
    24  func lostProfileEvent() { lostProfileEvent() }
    25  
    26  // funcPC returns the PC for the func value f.
    27  func funcPC(f interface{}) uintptr {
    28  	return *(*[2]*uintptr)(unsafe.Pointer(&f))[1]
    29  }
    30  
    31  // A profileBuilder writes a profile incrementally from a
    32  // stream of profile samples delivered by the runtime.
    33  type profileBuilder struct {
    34  	start      time.Time
    35  	end        time.Time
    36  	havePeriod bool
    37  	period     int64
    38  	m          profMap
    39  
    40  	// encoding state
    41  	w         io.Writer
    42  	zw        *gzip.Writer
    43  	pb        protobuf
    44  	strings   []string
    45  	stringMap map[string]int
    46  	locs      map[uintptr]locInfo // list of locInfo starting with the given PC.
    47  	funcs     map[string]int      // Package path-qualified function name to Function.ID
    48  	mem       []memMap
    49  	deck      pcDeck
    50  }
    51  
    52  type memMap struct {
    53  	// initialized as reading mapping
    54  	start         uintptr
    55  	end           uintptr
    56  	offset        uint64
    57  	file, buildID string
    58  
    59  	funcs symbolizeFlag
    60  	fake  bool // map entry was faked; /proc/self/maps wasn't available
    61  }
    62  
    63  // symbolizeFlag keeps track of symbolization result.
    64  //   0                  : no symbol lookup was performed
    65  //   1<<0 (lookupTried) : symbol lookup was performed
    66  //   1<<1 (lookupFailed): symbol lookup was performed but failed
    67  type symbolizeFlag uint8
    68  
    69  const (
    70  	lookupTried  symbolizeFlag = 1 << iota
    71  	lookupFailed symbolizeFlag = 1 << iota
    72  )
    73  
    74  const (
    75  	// message Profile
    76  	tagProfile_SampleType        = 1  // repeated ValueType
    77  	tagProfile_Sample            = 2  // repeated Sample
    78  	tagProfile_Mapping           = 3  // repeated Mapping
    79  	tagProfile_Location          = 4  // repeated Location
    80  	tagProfile_Function          = 5  // repeated Function
    81  	tagProfile_StringTable       = 6  // repeated string
    82  	tagProfile_DropFrames        = 7  // int64 (string table index)
    83  	tagProfile_KeepFrames        = 8  // int64 (string table index)
    84  	tagProfile_TimeNanos         = 9  // int64
    85  	tagProfile_DurationNanos     = 10 // int64
    86  	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
    87  	tagProfile_Period            = 12 // int64
    88  	tagProfile_Comment           = 13 // repeated int64
    89  	tagProfile_DefaultSampleType = 14 // int64
    90  
    91  	// message ValueType
    92  	tagValueType_Type = 1 // int64 (string table index)
    93  	tagValueType_Unit = 2 // int64 (string table index)
    94  
    95  	// message Sample
    96  	tagSample_Location = 1 // repeated uint64
    97  	tagSample_Value    = 2 // repeated int64
    98  	tagSample_Label    = 3 // repeated Label
    99  
   100  	// message Label
   101  	tagLabel_Key = 1 // int64 (string table index)
   102  	tagLabel_Str = 2 // int64 (string table index)
   103  	tagLabel_Num = 3 // int64
   104  
   105  	// message Mapping
   106  	tagMapping_ID              = 1  // uint64
   107  	tagMapping_Start           = 2  // uint64
   108  	tagMapping_Limit           = 3  // uint64
   109  	tagMapping_Offset          = 4  // uint64
   110  	tagMapping_Filename        = 5  // int64 (string table index)
   111  	tagMapping_BuildID         = 6  // int64 (string table index)
   112  	tagMapping_HasFunctions    = 7  // bool
   113  	tagMapping_HasFilenames    = 8  // bool
   114  	tagMapping_HasLineNumbers  = 9  // bool
   115  	tagMapping_HasInlineFrames = 10 // bool
   116  
   117  	// message Location
   118  	tagLocation_ID        = 1 // uint64
   119  	tagLocation_MappingID = 2 // uint64
   120  	tagLocation_Address   = 3 // uint64
   121  	tagLocation_Line      = 4 // repeated Line
   122  
   123  	// message Line
   124  	tagLine_FunctionID = 1 // uint64
   125  	tagLine_Line       = 2 // int64
   126  
   127  	// message Function
   128  	tagFunction_ID         = 1 // uint64
   129  	tagFunction_Name       = 2 // int64 (string table index)
   130  	tagFunction_SystemName = 3 // int64 (string table index)
   131  	tagFunction_Filename   = 4 // int64 (string table index)
   132  	tagFunction_StartLine  = 5 // int64
   133  )
   134  
   135  // stringIndex adds s to the string table if not already present
   136  // and returns the index of s in the string table.
   137  func (b *profileBuilder) stringIndex(s string) int64 {
   138  	id, ok := b.stringMap[s]
   139  	if !ok {
   140  		id = len(b.strings)
   141  		b.strings = append(b.strings, s)
   142  		b.stringMap[s] = id
   143  	}
   144  	return int64(id)
   145  }
   146  
   147  func (b *profileBuilder) flush() {
   148  	const dataFlush = 4096
   149  	if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
   150  		b.zw.Write(b.pb.data)
   151  		b.pb.data = b.pb.data[:0]
   152  	}
   153  }
   154  
   155  // pbValueType encodes a ValueType message to b.pb.
   156  func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
   157  	start := b.pb.startMessage()
   158  	b.pb.int64(tagValueType_Type, b.stringIndex(typ))
   159  	b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
   160  	b.pb.endMessage(tag, start)
   161  }
   162  
   163  // pbSample encodes a Sample message to b.pb.
   164  func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
   165  	start := b.pb.startMessage()
   166  	b.pb.int64s(tagSample_Value, values)
   167  	b.pb.uint64s(tagSample_Location, locs)
   168  	if labels != nil {
   169  		labels()
   170  	}
   171  	b.pb.endMessage(tagProfile_Sample, start)
   172  	b.flush()
   173  }
   174  
   175  // pbLabel encodes a Label message to b.pb.
   176  func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
   177  	start := b.pb.startMessage()
   178  	b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
   179  	b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
   180  	b.pb.int64Opt(tagLabel_Num, num)
   181  	b.pb.endMessage(tag, start)
   182  }
   183  
   184  // pbLine encodes a Line message to b.pb.
   185  func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
   186  	start := b.pb.startMessage()
   187  	b.pb.uint64Opt(tagLine_FunctionID, funcID)
   188  	b.pb.int64Opt(tagLine_Line, line)
   189  	b.pb.endMessage(tag, start)
   190  }
   191  
   192  // pbMapping encodes a Mapping message to b.pb.
   193  func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
   194  	start := b.pb.startMessage()
   195  	b.pb.uint64Opt(tagMapping_ID, id)
   196  	b.pb.uint64Opt(tagMapping_Start, base)
   197  	b.pb.uint64Opt(tagMapping_Limit, limit)
   198  	b.pb.uint64Opt(tagMapping_Offset, offset)
   199  	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
   200  	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
   201  	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
   202  	// Decide what to do about HasInlineFrames and HasLineNumbers.
   203  	// Also, another approach to handle the mapping entry with
   204  	// incomplete symbolization results is to duplicate the mapping
   205  	// entry (but with different Has* fields values) and use
   206  	// different entries for symbolized locations and unsymbolized locations.
   207  	if hasFuncs {
   208  		b.pb.bool(tagMapping_HasFunctions, true)
   209  	}
   210  	b.pb.endMessage(tag, start)
   211  }
   212  
   213  // labelMap is the representation of the label set held in the context type.
   214  // This is an initial implementation, but it will be replaced with something
   215  // that admits incremental immutable modification more efficiently.
   216  type labelMap map[string]string
   217  
   218  // String satisfies Stringer and returns key, value pairs in a consistent
   219  // order.
   220  func (l *labelMap) String() string {
   221  	if l == nil {
   222  		return ""
   223  	}
   224  	keyVals := make([]string, 0, len(*l))
   225  
   226  	for k, v := range *l {
   227  		keyVals = append(keyVals, fmt.Sprintf("%q:%q", k, v))
   228  	}
   229  
   230  	sort.Strings(keyVals)
   231  
   232  	return "{" + strings.Join(keyVals, ", ") + "}"
   233  }
   234  
   235  func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
   236  	// Expand this one address using CallersFrames so we can cache
   237  	// each expansion. In general, CallersFrames takes a whole
   238  	// stack, but in this case we know there will be no skips in
   239  	// the stack and we have return PCs anyway.
   240  	frames := runtime.CallersFrames([]uintptr{addr})
   241  	frame, more := frames.Next()
   242  	if frame.Function == "runtime.goexit" {
   243  		// Short-circuit if we see runtime.goexit so the loop
   244  		// below doesn't allocate a useless empty location.
   245  		return nil, 0
   246  	}
   247  
   248  	symbolizeResult := lookupTried
   249  	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
   250  		symbolizeResult |= lookupFailed
   251  	}
   252  
   253  	if frame.PC == 0 {
   254  		// If we failed to resolve the frame, at least make up
   255  		// a reasonable call PC. This mostly happens in tests.
   256  		frame.PC = addr - 1
   257  	}
   258  	ret := []runtime.Frame{frame}
   259  	for frame.Function != "runtime.goexit" && more == true {
   260  		frame, more = frames.Next()
   261  		ret = append(ret, frame)
   262  	}
   263  	return ret, symbolizeResult
   264  }
   265  
   266  type locInfo struct {
   267  	// location id assigned by the profileBuilder
   268  	id uint64
   269  
   270  	// sequence of PCs, including the fake PCs returned by the traceback
   271  	// to represent inlined functions
   272  	// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
   273  	pcs []uintptr
   274  }
   275  
   276  // newProfileBuilder returns a new profileBuilder.
   277  // CPU profiling data obtained from the runtime can be added
   278  // by calling b.addCPUData, and then the eventual profile
   279  // can be obtained by calling b.finish.
   280  func newProfileBuilder(w io.Writer) *profileBuilder {
   281  	zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
   282  	b := &profileBuilder{
   283  		w:         w,
   284  		zw:        zw,
   285  		start:     time.Now(),
   286  		strings:   []string{""},
   287  		stringMap: map[string]int{"": 0},
   288  		locs:      map[uintptr]locInfo{},
   289  		funcs:     map[string]int{},
   290  	}
   291  	b.readMapping()
   292  	return b
   293  }
   294  
   295  // addCPUData adds the CPU profiling data to the profile.
   296  // The data must be a whole number of records,
   297  // as delivered by the runtime.
   298  func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
   299  	if !b.havePeriod {
   300  		// first record is period
   301  		if len(data) < 3 {
   302  			return fmt.Errorf("truncated profile")
   303  		}
   304  		if data[0] != 3 || data[2] == 0 {
   305  			return fmt.Errorf("malformed profile")
   306  		}
   307  		// data[2] is sampling rate in Hz. Convert to sampling
   308  		// period in nanoseconds.
   309  		b.period = 1e9 / int64(data[2])
   310  		b.havePeriod = true
   311  		data = data[3:]
   312  	}
   313  
   314  	// Parse CPU samples from the profile.
   315  	// Each sample is 3+n uint64s:
   316  	//	data[0] = 3+n
   317  	//	data[1] = time stamp (ignored)
   318  	//	data[2] = count
   319  	//	data[3:3+n] = stack
   320  	// If the count is 0 and the stack has length 1,
   321  	// that's an overflow record inserted by the runtime
   322  	// to indicate that stack[0] samples were lost.
   323  	// Otherwise the count is usually 1,
   324  	// but in a few special cases like lost non-Go samples
   325  	// there can be larger counts.
   326  	// Because many samples with the same stack arrive,
   327  	// we want to deduplicate immediately, which we do
   328  	// using the b.m profMap.
   329  	for len(data) > 0 {
   330  		if len(data) < 3 || data[0] > uint64(len(data)) {
   331  			return fmt.Errorf("truncated profile")
   332  		}
   333  		if data[0] < 3 || tags != nil && len(tags) < 1 {
   334  			return fmt.Errorf("malformed profile")
   335  		}
   336  		count := data[2]
   337  		stk := data[3:data[0]]
   338  		data = data[data[0]:]
   339  		var tag unsafe.Pointer
   340  		if tags != nil {
   341  			tag = tags[0]
   342  			tags = tags[1:]
   343  		}
   344  
   345  		if count == 0 && len(stk) == 1 {
   346  			// overflow record
   347  			count = uint64(stk[0])
   348  			stk = []uint64{
   349  				uint64(funcPC(lostProfileEvent)),
   350  			}
   351  		}
   352  		b.m.lookup(stk, tag).count += int64(count)
   353  	}
   354  	return nil
   355  }
   356  
   357  // build completes and returns the constructed profile.
   358  func (b *profileBuilder) build() {
   359  	b.end = time.Now()
   360  
   361  	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
   362  	if b.havePeriod { // must be CPU profile
   363  		b.pbValueType(tagProfile_SampleType, "samples", "count")
   364  		b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
   365  		b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
   366  		b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
   367  		b.pb.int64Opt(tagProfile_Period, b.period)
   368  	}
   369  
   370  	values := []int64{0, 0}
   371  	var locs []uint64
   372  
   373  	for e := b.m.all; e != nil; e = e.nextAll {
   374  		values[0] = e.count
   375  		values[1] = e.count * b.period
   376  
   377  		var labels func()
   378  		if e.tag != nil {
   379  			labels = func() {
   380  				for k, v := range *(*labelMap)(e.tag) {
   381  					b.pbLabel(tagSample_Label, k, v, 0)
   382  				}
   383  			}
   384  		}
   385  
   386  		locs = b.appendLocsForStack(locs[:0], e.stk)
   387  
   388  		b.pbSample(values, locs, labels)
   389  	}
   390  
   391  	for i, m := range b.mem {
   392  		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
   393  		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
   394  	}
   395  
   396  	// TODO: Anything for tagProfile_DropFrames?
   397  	// TODO: Anything for tagProfile_KeepFrames?
   398  
   399  	b.pb.strings(tagProfile_StringTable, b.strings)
   400  	b.zw.Write(b.pb.data)
   401  	b.zw.Close()
   402  }
   403  
   404  // appendLocsForStack appends the location IDs for the given stack trace to the given
   405  // location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
   406  // an inline marker as the runtime traceback function returns.
   407  //
   408  // It may emit to b.pb, so there must be no message encoding in progress.
   409  func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
   410  	b.deck.reset()
   411  	for len(stk) > 0 {
   412  		addr := stk[0]
   413  		if l, ok := b.locs[addr]; ok {
   414  			// first record the location if there is any pending accumulated info.
   415  			if id := b.emitLocation(); id > 0 {
   416  				locs = append(locs, id)
   417  			}
   418  
   419  			// then, record the cached location.
   420  			locs = append(locs, l.id)
   421  
   422  			// The stk may be truncated due to the stack depth limit
   423  			// (e.g. See maxStack and maxCPUProfStack in runtime) or
   424  			// bugs in runtime. Avoid the crash in either case.
   425  			// TODO(hyangah): The correct fix may require using the exact
   426  			// pcs as the key for b.locs cache management instead of just
   427  			// relying on the very first pc. We are late in the go1.14 dev
   428  			// cycle, so this is a workaround with little code change.
   429  			if len(l.pcs) > len(stk) {
   430  				stk = nil
   431  				// TODO(hyangah): would be nice if we can enable
   432  				// debug print out on demand and report the problematic
   433  				// cached location entry and stack traces. Do we already
   434  				// have such facility to utilize (e.g. GODEBUG)?
   435  			} else {
   436  				stk = stk[len(l.pcs):] // skip the matching pcs.
   437  			}
   438  			continue
   439  		}
   440  
   441  		frames, symbolizeResult := allFrames(addr)
   442  		if len(frames) == 0 { // runtime.goexit.
   443  			if id := b.emitLocation(); id > 0 {
   444  				locs = append(locs, id)
   445  			}
   446  			stk = stk[1:]
   447  			continue
   448  		}
   449  
   450  		if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
   451  			stk = stk[1:]
   452  			continue
   453  		}
   454  		// add failed because this addr is not inlined with
   455  		// the existing PCs in the deck. Flush the deck and retry to
   456  		// handle this pc.
   457  		if id := b.emitLocation(); id > 0 {
   458  			locs = append(locs, id)
   459  		}
   460  
   461  		// check cache again - previous emitLocation added a new entry
   462  		if l, ok := b.locs[addr]; ok {
   463  			locs = append(locs, l.id)
   464  			stk = stk[len(l.pcs):] // skip the matching pcs.
   465  		} else {
   466  			b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
   467  			stk = stk[1:]
   468  		}
   469  	}
   470  	if id := b.emitLocation(); id > 0 { // emit remaining location.
   471  		locs = append(locs, id)
   472  	}
   473  	return locs
   474  }
   475  
   476  // pcDeck is a helper to detect a sequence of inlined functions from
   477  // a stack trace returned by the runtime.
   478  //
   479  // The stack traces returned by runtime's traceback functions are fully
   480  // expanded (at least for Go functions) and include the fake pcs representing
   481  // inlined functions. The profile proto expects the inlined functions to be
   482  // encoded in one Location message.
   483  // https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
   484  //
   485  // Runtime does not directly expose whether a frame is for an inlined function
   486  // and looking up debug info is not ideal, so we use a heuristic to filter
   487  // the fake pcs and restore the inlined and entry functions. Inlined functions
   488  // have the following properties:
   489  //   Frame's Func is nil (note: also true for non-Go functions), and
   490  //   Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
   491  //   Frame's Name does not match its entry function frame's name.
   492  //
   493  // As reading and processing the pcs in a stack trace one by one (from leaf to the root),
   494  // we use pcDeck to temporarily hold the observed pcs and their expanded frames
   495  // until we observe the entry function frame.
   496  type pcDeck struct {
   497  	pcs             []uintptr
   498  	frames          []runtime.Frame
   499  	symbolizeResult symbolizeFlag
   500  }
   501  
   502  func (d *pcDeck) reset() {
   503  	d.pcs = d.pcs[:0]
   504  	d.frames = d.frames[:0]
   505  	d.symbolizeResult = 0
   506  }
   507  
   508  // tryAdd tries to add the pc and Frames expanded from it (most likely one,
   509  // since the stack trace is already fully expanded) and the symbolizeResult
   510  // to the deck. If it fails the caller needs to flush the deck and retry.
   511  func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
   512  	if existing := len(d.pcs); existing > 0 {
   513  		// 'frames' are all expanded from one 'pc' and represent all inlined functions
   514  		// so we check only the last one.
   515  		newFrame := frames[0]
   516  		last := d.frames[existing-1]
   517  		if last.Func != nil { // the last frame can't be inlined. Flush.
   518  			return false
   519  		}
   520  		if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
   521  			return false
   522  		}
   523  
   524  		if last.Entry != newFrame.Entry { // newFrame is for a different function.
   525  			return false
   526  		}
   527  		if last.Function == newFrame.Function { // maybe recursion.
   528  			return false
   529  		}
   530  	}
   531  	d.pcs = append(d.pcs, pc)
   532  	d.frames = append(d.frames, frames...)
   533  	d.symbolizeResult |= symbolizeResult
   534  	return true
   535  }
   536  
   537  // emitLocation emits the new location and function information recorded in the deck
   538  // and returns the location ID encoded in the profile protobuf.
   539  // It emits to b.pb, so there must be no message encoding in progress.
   540  // It resets the deck.
   541  func (b *profileBuilder) emitLocation() uint64 {
   542  	if len(b.deck.pcs) == 0 {
   543  		return 0
   544  	}
   545  	defer b.deck.reset()
   546  
   547  	addr := b.deck.pcs[0]
   548  	firstFrame := b.deck.frames[0]
   549  
   550  	// We can't write out functions while in the middle of the
   551  	// Location message, so record new functions we encounter and
   552  	// write them out after the Location.
   553  	type newFunc struct {
   554  		id         uint64
   555  		name, file string
   556  	}
   557  	newFuncs := make([]newFunc, 0, 8)
   558  
   559  	id := uint64(len(b.locs)) + 1
   560  	b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
   561  
   562  	start := b.pb.startMessage()
   563  	b.pb.uint64Opt(tagLocation_ID, id)
   564  	b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
   565  	for _, frame := range b.deck.frames {
   566  		// Write out each line in frame expansion.
   567  		funcID := uint64(b.funcs[frame.Function])
   568  		if funcID == 0 {
   569  			funcID = uint64(len(b.funcs)) + 1
   570  			b.funcs[frame.Function] = int(funcID)
   571  			newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
   572  		}
   573  		b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
   574  	}
   575  	for i := range b.mem {
   576  		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
   577  			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
   578  
   579  			m := b.mem[i]
   580  			m.funcs |= b.deck.symbolizeResult
   581  			b.mem[i] = m
   582  			break
   583  		}
   584  	}
   585  	b.pb.endMessage(tagProfile_Location, start)
   586  
   587  	// Write out functions we found during frame expansion.
   588  	for _, fn := range newFuncs {
   589  		start := b.pb.startMessage()
   590  		b.pb.uint64Opt(tagFunction_ID, fn.id)
   591  		b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
   592  		b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
   593  		b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
   594  		b.pb.endMessage(tagProfile_Function, start)
   595  	}
   596  
   597  	b.flush()
   598  	return id
   599  }
   600  
   601  // readMapping reads /proc/self/maps and writes mappings to b.pb.
   602  // It saves the address ranges of the mappings in b.mem for use
   603  // when emitting locations.
   604  func (b *profileBuilder) readMapping() {
   605  	data, _ := os.ReadFile("/proc/self/maps")
   606  	parseProcSelfMaps(data, b.addMapping)
   607  	if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
   608  		b.addMappingEntry(0, 0, 0, "", "", true)
   609  		// TODO(hyangah): make addMapping return *memMap or
   610  		// take a memMap struct, and get rid of addMappingEntry
   611  		// that takes a bunch of positional arguments.
   612  	}
   613  }
   614  
   615  func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
   616  	// $ cat /proc/self/maps
   617  	// 00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
   618  	// 0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat
   619  	// 0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat
   620  	// 014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
   621  	// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
   622  	// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   623  	// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   624  	// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   625  	// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   626  	// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
   627  	// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   628  	// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
   629  	// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
   630  	// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   631  	// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   632  	// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
   633  	// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
   634  	// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
   635  	// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
   636  
   637  	var line []byte
   638  	// next removes and returns the next field in the line.
   639  	// It also removes from line any spaces following the field.
   640  	next := func() []byte {
   641  		j := bytes.IndexByte(line, ' ')
   642  		if j < 0 {
   643  			f := line
   644  			line = nil
   645  			return f
   646  		}
   647  		f := line[:j]
   648  		line = line[j+1:]
   649  		for len(line) > 0 && line[0] == ' ' {
   650  			line = line[1:]
   651  		}
   652  		return f
   653  	}
   654  
   655  	for len(data) > 0 {
   656  		i := bytes.IndexByte(data, '\n')
   657  		if i < 0 {
   658  			line, data = data, nil
   659  		} else {
   660  			line, data = data[:i], data[i+1:]
   661  		}
   662  		addr := next()
   663  		i = bytes.IndexByte(addr, '-')
   664  		if i < 0 {
   665  			continue
   666  		}
   667  		lo, err := strconv.ParseUint(string(addr[:i]), 16, 64)
   668  		if err != nil {
   669  			continue
   670  		}
   671  		hi, err := strconv.ParseUint(string(addr[i+1:]), 16, 64)
   672  		if err != nil {
   673  			continue
   674  		}
   675  		perm := next()
   676  		if len(perm) < 4 || perm[2] != 'x' {
   677  			// Only interested in executable mappings.
   678  			continue
   679  		}
   680  		offset, err := strconv.ParseUint(string(next()), 16, 64)
   681  		if err != nil {
   682  			continue
   683  		}
   684  		next()          // dev
   685  		inode := next() // inode
   686  		if line == nil {
   687  			continue
   688  		}
   689  		file := string(line)
   690  
   691  		// Trim deleted file marker.
   692  		deletedStr := " (deleted)"
   693  		deletedLen := len(deletedStr)
   694  		if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
   695  			file = file[:len(file)-deletedLen]
   696  		}
   697  
   698  		if len(inode) == 1 && inode[0] == '0' && file == "" {
   699  			// Huge-page text mappings list the initial fragment of
   700  			// mapped but unpopulated memory as being inode 0.
   701  			// Don't report that part.
   702  			// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
   703  			continue
   704  		}
   705  
   706  		// TODO: pprof's remapMappingIDs makes two adjustments:
   707  		// 1. If there is an /anon_hugepage mapping first and it is
   708  		// consecutive to a next mapping, drop the /anon_hugepage.
   709  		// 2. If start-offset = 0x400000, change start to 0x400000 and offset to 0.
   710  		// There's no indication why either of these is needed.
   711  		// Let's try not doing these and see what breaks.
   712  		// If we do need them, they would go here, before we
   713  		// enter the mappings into b.mem in the first place.
   714  
   715  		buildID, _ := elfBuildID(file)
   716  		addMapping(lo, hi, offset, file, buildID)
   717  	}
   718  }
   719  
   720  func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
   721  	b.addMappingEntry(lo, hi, offset, file, buildID, false)
   722  }
   723  
   724  func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
   725  	b.mem = append(b.mem, memMap{
   726  		start:   uintptr(lo),
   727  		end:     uintptr(hi),
   728  		offset:  offset,
   729  		file:    file,
   730  		buildID: buildID,
   731  		fake:    fake,
   732  	})
   733  }