github.com/pyroscope-io/godeltaprof@v0.1.3-0.20230906152420-0d7eeca7b8c1/internal/pprof/proto.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pprof
     6  
     7  import (
     8  	"bytes"
     9  	"compress/gzip"
    10  	"io"
    11  	"os"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  )
    17  
    18  // lostProfileEvent is the function to which lost profiling
    19  // events are attributed.
    20  // (The name shows up in the pprof graphs.)
    21  func lostProfileEvent() { lostProfileEvent() }
    22  
    23  // A profileBuilder writes a profile incrementally from a
    24  // stream of profile samples delivered by the runtime.
    25  type profileBuilder struct {
    26  	start      time.Time
    27  	end        time.Time
    28  	havePeriod bool
    29  	period     int64
    30  	//m          profMap
    31  
    32  	// encoding state
    33  	w         io.Writer
    34  	zw        *gzip.Writer
    35  	pb        protobuf
    36  	strings   []string
    37  	stringMap map[string]int
    38  	locs      map[uintptr]locInfo // list of locInfo starting with the given PC.
    39  	funcs     map[string]int      // Package path-qualified function name to Function.ID
    40  	mem       []memMap
    41  	deck      pcDeck
    42  }
    43  
    44  type memMap struct {
    45  	// initialized as reading mapping
    46  	start         uintptr
    47  	end           uintptr
    48  	offset        uint64
    49  	file, buildID string
    50  
    51  	funcs symbolizeFlag
    52  	fake  bool // map entry was faked; /proc/self/maps wasn't available
    53  }
    54  
    55  // symbolizeFlag keeps track of symbolization result.
    56  //
    57  //	0                  : no symbol lookup was performed
    58  //	1<<0 (lookupTried) : symbol lookup was performed
    59  //	1<<1 (lookupFailed): symbol lookup was performed but failed
    60  type symbolizeFlag uint8
    61  
    62  const (
    63  	lookupTried  symbolizeFlag = 1 << iota
    64  	lookupFailed symbolizeFlag = 1 << iota
    65  )
    66  
    67  const (
    68  	// message Profile
    69  	tagProfile_SampleType        = 1  // repeated ValueType
    70  	tagProfile_Sample            = 2  // repeated Sample
    71  	tagProfile_Mapping           = 3  // repeated Mapping
    72  	tagProfile_Location          = 4  // repeated Location
    73  	tagProfile_Function          = 5  // repeated Function
    74  	tagProfile_StringTable       = 6  // repeated string
    75  	tagProfile_DropFrames        = 7  // int64 (string table index)
    76  	tagProfile_KeepFrames        = 8  // int64 (string table index)
    77  	tagProfile_TimeNanos         = 9  // int64
    78  	tagProfile_DurationNanos     = 10 // int64
    79  	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
    80  	tagProfile_Period            = 12 // int64
    81  	tagProfile_Comment           = 13 // repeated int64
    82  	tagProfile_DefaultSampleType = 14 // int64
    83  
    84  	// message ValueType
    85  	tagValueType_Type = 1 // int64 (string table index)
    86  	tagValueType_Unit = 2 // int64 (string table index)
    87  
    88  	// message Sample
    89  	tagSample_Location = 1 // repeated uint64
    90  	tagSample_Value    = 2 // repeated int64
    91  	tagSample_Label    = 3 // repeated Label
    92  
    93  	// message Label
    94  	tagLabel_Key = 1 // int64 (string table index)
    95  	tagLabel_Str = 2 // int64 (string table index)
    96  	tagLabel_Num = 3 // int64
    97  
    98  	// message Mapping
    99  	tagMapping_ID              = 1  // uint64
   100  	tagMapping_Start           = 2  // uint64
   101  	tagMapping_Limit           = 3  // uint64
   102  	tagMapping_Offset          = 4  // uint64
   103  	tagMapping_Filename        = 5  // int64 (string table index)
   104  	tagMapping_BuildID         = 6  // int64 (string table index)
   105  	tagMapping_HasFunctions    = 7  // bool
   106  	tagMapping_HasFilenames    = 8  // bool
   107  	tagMapping_HasLineNumbers  = 9  // bool
   108  	tagMapping_HasInlineFrames = 10 // bool
   109  
   110  	// message Location
   111  	tagLocation_ID        = 1 // uint64
   112  	tagLocation_MappingID = 2 // uint64
   113  	tagLocation_Address   = 3 // uint64
   114  	tagLocation_Line      = 4 // repeated Line
   115  
   116  	// message Line
   117  	tagLine_FunctionID = 1 // uint64
   118  	tagLine_Line       = 2 // int64
   119  
   120  	// message Function
   121  	tagFunction_ID         = 1 // uint64
   122  	tagFunction_Name       = 2 // int64 (string table index)
   123  	tagFunction_SystemName = 3 // int64 (string table index)
   124  	tagFunction_Filename   = 4 // int64 (string table index)
   125  	tagFunction_StartLine  = 5 // int64
   126  )
   127  
   128  // stringIndex adds s to the string table if not already present
   129  // and returns the index of s in the string table.
   130  func (b *profileBuilder) stringIndex(s string) int64 {
   131  	id, ok := b.stringMap[s]
   132  	if !ok {
   133  		id = len(b.strings)
   134  		b.strings = append(b.strings, s)
   135  		b.stringMap[s] = id
   136  	}
   137  	return int64(id)
   138  }
   139  
   140  func (b *profileBuilder) flush() {
   141  	const dataFlush = 4096
   142  	if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
   143  		b.zw.Write(b.pb.data)
   144  		b.pb.data = b.pb.data[:0]
   145  	}
   146  }
   147  
   148  // pbValueType encodes a ValueType message to b.pb.
   149  func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
   150  	start := b.pb.startMessage()
   151  	b.pb.int64(tagValueType_Type, b.stringIndex(typ))
   152  	b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
   153  	b.pb.endMessage(tag, start)
   154  }
   155  
   156  // pbSample encodes a Sample message to b.pb.
   157  func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
   158  	start := b.pb.startMessage()
   159  	b.pb.int64s(tagSample_Value, values)
   160  	b.pb.uint64s(tagSample_Location, locs)
   161  	if labels != nil {
   162  		labels()
   163  	}
   164  	b.pb.endMessage(tagProfile_Sample, start)
   165  	b.flush()
   166  }
   167  
   168  // pbLabel encodes a Label message to b.pb.
   169  func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
   170  	start := b.pb.startMessage()
   171  	b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
   172  	b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
   173  	b.pb.int64Opt(tagLabel_Num, num)
   174  	b.pb.endMessage(tag, start)
   175  }
   176  
   177  // pbLine encodes a Line message to b.pb.
   178  func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
   179  	start := b.pb.startMessage()
   180  	b.pb.uint64Opt(tagLine_FunctionID, funcID)
   181  	b.pb.int64Opt(tagLine_Line, line)
   182  	b.pb.endMessage(tag, start)
   183  }
   184  
   185  // pbMapping encodes a Mapping message to b.pb.
   186  func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
   187  	start := b.pb.startMessage()
   188  	b.pb.uint64Opt(tagMapping_ID, id)
   189  	b.pb.uint64Opt(tagMapping_Start, base)
   190  	b.pb.uint64Opt(tagMapping_Limit, limit)
   191  	b.pb.uint64Opt(tagMapping_Offset, offset)
   192  	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
   193  	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
   194  	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
   195  	// Decide what to do about HasInlineFrames and HasLineNumbers.
   196  	// Also, another approach to handle the mapping entry with
   197  	// incomplete symbolization results is to dupliace the mapping
   198  	// entry (but with different Has* fields values) and use
   199  	// different entries for symbolized locations and unsymbolized locations.
   200  	if hasFuncs {
   201  		b.pb.bool(tagMapping_HasFunctions, true)
   202  	}
   203  	b.pb.endMessage(tag, start)
   204  }
   205  
   206  func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
   207  	// Expand this one address using CallersFrames so we can cache
   208  	// each expansion. In general, CallersFrames takes a whole
   209  	// stack, but in this case we know there will be no skips in
   210  	// the stack and we have return PCs anyway.
   211  	frames := runtime.CallersFrames([]uintptr{addr})
   212  	frame, more := frames.Next()
   213  	if frame.Function == "runtime.goexit" {
   214  		// Short-circuit if we see runtime.goexit so the loop
   215  		// below doesn't allocate a useless empty location.
   216  		return nil, 0
   217  	}
   218  
   219  	symbolizeResult := lookupTried
   220  	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
   221  		symbolizeResult |= lookupFailed
   222  	}
   223  
   224  	if frame.PC == 0 {
   225  		// If we failed to resolve the frame, at least make up
   226  		// a reasonable call PC. This mostly happens in tests.
   227  		frame.PC = addr - 1
   228  	}
   229  	ret := []runtime.Frame{frame}
   230  	for frame.Function != "runtime.goexit" && more == true {
   231  		frame, more = frames.Next()
   232  		ret = append(ret, frame)
   233  	}
   234  	return ret, symbolizeResult
   235  }
   236  
   237  type locInfo struct {
   238  	// location id assigned by the profileBuilder
   239  	id uint64
   240  
   241  	// sequence of PCs, including the fake PCs returned by the traceback
   242  	// to represent inlined functions
   243  	// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
   244  	pcs []uintptr
   245  
   246  	// firstPCFrames and firstPCSymbolizeResult hold the results of the
   247  	// allFrames call for the first (leaf-most) PC this locInfo represents
   248  	firstPCFrames          []runtime.Frame
   249  	firstPCSymbolizeResult symbolizeFlag
   250  }
   251  
   252  // newProfileBuilder returns a new profileBuilder.
   253  // CPU profiling data obtained from the runtime can be added
   254  // by calling b.addCPUData, and then the eventual profile
   255  // can be obtained by calling b.finish.
   256  func newProfileBuilder(w io.Writer) *profileBuilder {
   257  	zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
   258  	b := &profileBuilder{
   259  		w:         w,
   260  		zw:        zw,
   261  		start:     time.Now(),
   262  		strings:   []string{""},
   263  		stringMap: map[string]int{"": 0},
   264  		locs:      map[uintptr]locInfo{},
   265  		funcs:     map[string]int{},
   266  	}
   267  	b.readMapping()
   268  	return b
   269  }
   270  
   271  // addCPUData adds the CPU profiling data to the profile.
   272  //
   273  // The data must be a whole number of records, as delivered by the runtime.
   274  // len(tags) must be equal to the number of records in data.
   275  //func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
   276  //	if !b.havePeriod {
   277  //		// first record is period
   278  //		if len(data) < 3 {
   279  //			return fmt.Errorf("truncated profile")
   280  //		}
   281  //		if data[0] != 3 || data[2] == 0 {
   282  //			return fmt.Errorf("malformed profile")
   283  //		}
   284  //		// data[2] is sampling rate in Hz. Convert to sampling
   285  //		// period in nanoseconds.
   286  //		b.period = 1e9 / int64(data[2])
   287  //		b.havePeriod = true
   288  //		data = data[3:]
   289  //		// Consume tag slot. Note that there isn't a meaningful tag
   290  //		// value for this record.
   291  //		tags = tags[1:]
   292  //	}
   293  //
   294  //	// Parse CPU samples from the profile.
   295  //	// Each sample is 3+n uint64s:
   296  //	//	data[0] = 3+n
   297  //	//	data[1] = time stamp (ignored)
   298  //	//	data[2] = count
   299  //	//	data[3:3+n] = stack
   300  //	// If the count is 0 and the stack has length 1,
   301  //	// that's an overflow record inserted by the runtime
   302  //	// to indicate that stack[0] samples were lost.
   303  //	// Otherwise the count is usually 1,
   304  //	// but in a few special cases like lost non-Go samples
   305  //	// there can be larger counts.
   306  //	// Because many samples with the same stack arrive,
   307  //	// we want to deduplicate immediately, which we do
   308  //	// using the b.m profMap.
   309  //	for len(data) > 0 {
   310  //		if len(data) < 3 || data[0] > uint64(len(data)) {
   311  //			return fmt.Errorf("truncated profile")
   312  //		}
   313  //		if data[0] < 3 || tags != nil && len(tags) < 1 {
   314  //			return fmt.Errorf("malformed profile")
   315  //		}
   316  //		if len(tags) < 1 {
   317  //			return fmt.Errorf("mismatched profile records and tags")
   318  //		}
   319  //		count := data[2]
   320  //		stk := data[3:data[0]]
   321  //		data = data[data[0]:]
   322  //		tag := tags[0]
   323  //		tags = tags[1:]
   324  //
   325  //		if count == 0 && len(stk) == 1 {
   326  //			// overflow record
   327  //			count = uint64(stk[0])
   328  //			stk = []uint64{
   329  //				// gentraceback guarantees that PCs in the
   330  //				// stack can be unconditionally decremented and
   331  //				// still be valid, so we must do the same.
   332  //				uint64(abi.FuncPCABIInternal(lostProfileEvent) + 1),
   333  //			}
   334  //		}
   335  //		b.m.lookup(stk, tag).count += int64(count)
   336  //	}
   337  //
   338  //	if len(tags) != 0 {
   339  //		return fmt.Errorf("mismatched profile records and tags")
   340  //	}
   341  //	return nil
   342  //}
   343  
   344  // build completes and returns the constructed profile.
   345  func (b *profileBuilder) build() {
   346  	b.end = time.Now()
   347  
   348  	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
   349  	if b.havePeriod { // must be CPU profile
   350  		b.pbValueType(tagProfile_SampleType, "samples", "count")
   351  		b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
   352  		b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
   353  		b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
   354  		b.pb.int64Opt(tagProfile_Period, b.period)
   355  	}
   356  
   357  	//values := []int64{0, 0}
   358  	//var locs []uint64
   359  
   360  	//for e := b.m.all; e != nil; e = e.nextAll {
   361  	//	values[0] = e.count
   362  	//	values[1] = e.count * b.period
   363  	//
   364  	//	var labels func()
   365  	//	if e.tag != nil {
   366  	//		labels = func() {
   367  	//			for k, v := range *(*labelMap)(e.tag) {
   368  	//				b.pbLabel(tagSample_Label, k, v, 0)
   369  	//			}
   370  	//		}
   371  	//	}
   372  	//
   373  	//	locs = b.appendLocsForStack(locs[:0], e.stk)
   374  	//
   375  	//	b.pbSample(values, locs, labels)
   376  	//}
   377  
   378  	for i, m := range b.mem {
   379  		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
   380  		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
   381  	}
   382  
   383  	// TODO: Anything for tagProfile_DropFrames?
   384  	// TODO: Anything for tagProfile_KeepFrames?
   385  
   386  	b.pb.strings(tagProfile_StringTable, b.strings)
   387  	b.zw.Write(b.pb.data)
   388  	b.zw.Close()
   389  }
   390  
   391  // appendLocsForStack appends the location IDs for the given stack trace to the given
   392  // location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
   393  // an inline marker as the runtime traceback function returns.
   394  //
   395  // It may emit to b.pb, so there must be no message encoding in progress.
   396  func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
   397  	b.deck.reset()
   398  
   399  	// The last frame might be truncated. Recover lost inline frames.
   400  	stk = runtime_expandFinalInlineFrame(stk)
   401  
   402  	for len(stk) > 0 {
   403  		addr := stk[0]
   404  		if l, ok := b.locs[addr]; ok {
   405  			// When generating code for an inlined function, the compiler adds
   406  			// NOP instructions to the outermost function as a placeholder for
   407  			// each layer of inlining. When the runtime generates tracebacks for
   408  			// stacks that include inlined functions, it uses the addresses of
   409  			// those NOPs as "fake" PCs on the stack as if they were regular
   410  			// function call sites. But if a profiling signal arrives while the
   411  			// CPU is executing one of those NOPs, its PC will show up as a leaf
   412  			// in the profile with its own Location entry. So, always check
   413  			// whether addr is a "fake" PC in the context of the current call
   414  			// stack by trying to add it to the inlining deck before assuming
   415  			// that the deck is complete.
   416  			if len(b.deck.pcs) > 0 {
   417  				if added := b.deck.tryAdd(addr, l.firstPCFrames, l.firstPCSymbolizeResult); added {
   418  					stk = stk[1:]
   419  					continue
   420  				}
   421  			}
   422  
   423  			// first record the location if there is any pending accumulated info.
   424  			if id := b.emitLocation(); id > 0 {
   425  				locs = append(locs, id)
   426  			}
   427  
   428  			// then, record the cached location.
   429  			locs = append(locs, l.id)
   430  
   431  			// Skip the matching pcs.
   432  			//
   433  			// Even if stk was truncated due to the stack depth
   434  			// limit, expandFinalInlineFrame above has already
   435  			// fixed the truncation, ensuring it is long enough.
   436  			stk = stk[len(l.pcs):]
   437  			continue
   438  		}
   439  
   440  		frames, symbolizeResult := allFrames(addr)
   441  		if len(frames) == 0 { // runtime.goexit.
   442  			if id := b.emitLocation(); id > 0 {
   443  				locs = append(locs, id)
   444  			}
   445  			stk = stk[1:]
   446  			continue
   447  		}
   448  
   449  		if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
   450  			stk = stk[1:]
   451  			continue
   452  		}
   453  		// add failed because this addr is not inlined with the
   454  		// existing PCs in the deck. Flush the deck and retry handling
   455  		// this pc.
   456  		if id := b.emitLocation(); id > 0 {
   457  			locs = append(locs, id)
   458  		}
   459  
   460  		// check cache again - previous emitLocation added a new entry
   461  		if l, ok := b.locs[addr]; ok {
   462  			locs = append(locs, l.id)
   463  			stk = stk[len(l.pcs):] // skip the matching pcs.
   464  		} else {
   465  			b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
   466  			stk = stk[1:]
   467  		}
   468  	}
   469  	if id := b.emitLocation(); id > 0 { // emit remaining location.
   470  		locs = append(locs, id)
   471  	}
   472  	return locs
   473  }
   474  
   475  // Here's an example of how Go 1.17 writes out inlined functions, compiled for
   476  // linux/amd64. The disassembly of main.main shows two levels of inlining: main
   477  // calls b, b calls a, a does some work.
   478  //
   479  //   inline.go:9   0x4553ec  90              NOPL                 // func main()    { b(v) }
   480  //   inline.go:6   0x4553ed  90              NOPL                 // func b(v *int) { a(v) }
   481  //   inline.go:5   0x4553ee  48c7002a000000  MOVQ $0x2a, 0(AX)    // func a(v *int) { *v = 42 }
   482  //
   483  // If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line
   484  // 5), the runtime will report the stack as the MOVQ frame being called by the
   485  // NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line
   486  // 9).
   487  //
   488  // The role of pcDeck is to collapse those three frames back into a single
   489  // location at 0x4553ee, with file/line/function symbolization info representing
   490  // the three layers of calls. It does that via sequential calls to pcDeck.tryAdd
   491  // starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be
   492  // for the caller of main.main. Because main.main was not inlined in its caller,
   493  // the deck will reject the addition, and the fourth PC on the stack will get
   494  // its own location.
   495  
   496  // pcDeck is a helper to detect a sequence of inlined functions from
   497  // a stack trace returned by the runtime.
   498  //
   499  // The stack traces returned by runtime's trackback functions are fully
   500  // expanded (at least for Go functions) and include the fake pcs representing
   501  // inlined functions. The profile proto expects the inlined functions to be
   502  // encoded in one Location message.
   503  // https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
   504  //
   505  // Runtime does not directly expose whether a frame is for an inlined function
   506  // and looking up debug info is not ideal, so we use a heuristic to filter
   507  // the fake pcs and restore the inlined and entry functions. Inlined functions
   508  // have the following properties:
   509  //
   510  //	Frame's Func is nil (note: also true for non-Go functions), and
   511  //	Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
   512  //	Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
   513  //
   514  // As reading and processing the pcs in a stack trace one by one (from leaf to the root),
   515  // we use pcDeck to temporarily hold the observed pcs and their expanded frames
   516  // until we observe the entry function frame.
   517  type pcDeck struct {
   518  	pcs             []uintptr
   519  	frames          []runtime.Frame
   520  	symbolizeResult symbolizeFlag
   521  
   522  	// firstPCFrames indicates the number of frames associated with the first
   523  	// (leaf-most) PC in the deck
   524  	firstPCFrames int
   525  	// firstPCSymbolizeResult holds the results of the allFrames call for the
   526  	// first (leaf-most) PC in the deck
   527  	firstPCSymbolizeResult symbolizeFlag
   528  }
   529  
   530  func (d *pcDeck) reset() {
   531  	d.pcs = d.pcs[:0]
   532  	d.frames = d.frames[:0]
   533  	d.symbolizeResult = 0
   534  	d.firstPCFrames = 0
   535  	d.firstPCSymbolizeResult = 0
   536  }
   537  
   538  // tryAdd tries to add the pc and Frames expanded from it (most likely one,
   539  // since the stack trace is already fully expanded) and the symbolizeResult
   540  // to the deck. If it fails the caller needs to flush the deck and retry.
   541  func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
   542  	if existing := len(d.frames); existing > 0 {
   543  		// 'd.frames' are all expanded from one 'pc' and represent all
   544  		// inlined functions so we check only the last one.
   545  		newFrame := frames[0]
   546  		last := d.frames[existing-1]
   547  		if last.Func != nil { // the last frame can't be inlined. Flush.
   548  			return false
   549  		}
   550  		if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
   551  			return false
   552  		}
   553  
   554  		if last.Entry != newFrame.Entry { // newFrame is for a different function.
   555  			return false
   556  		}
   557  		if last.Function == newFrame.Function { // maybe recursion.
   558  			return false
   559  		}
   560  	}
   561  	d.pcs = append(d.pcs, pc)
   562  	d.frames = append(d.frames, frames...)
   563  	d.symbolizeResult |= symbolizeResult
   564  	if len(d.pcs) == 1 {
   565  		d.firstPCFrames = len(d.frames)
   566  		d.firstPCSymbolizeResult = symbolizeResult
   567  	}
   568  	return true
   569  }
   570  
   571  // emitLocation emits the new location and function information recorded in the deck
   572  // and returns the location ID encoded in the profile protobuf.
   573  // It emits to b.pb, so there must be no message encoding in progress.
   574  // It resets the deck.
   575  func (b *profileBuilder) emitLocation() uint64 {
   576  	if len(b.deck.pcs) == 0 {
   577  		return 0
   578  	}
   579  	defer b.deck.reset()
   580  
   581  	addr := b.deck.pcs[0]
   582  	firstFrame := b.deck.frames[0]
   583  
   584  	// We can't write out functions while in the middle of the
   585  	// Location message, so record new functions we encounter and
   586  	// write them out after the Location.
   587  	type newFunc struct {
   588  		id         uint64
   589  		name, file string
   590  	}
   591  	newFuncs := make([]newFunc, 0, 8)
   592  
   593  	id := uint64(len(b.locs)) + 1
   594  	b.locs[addr] = locInfo{
   595  		id:                     id,
   596  		pcs:                    append([]uintptr{}, b.deck.pcs...),
   597  		firstPCSymbolizeResult: b.deck.firstPCSymbolizeResult,
   598  		firstPCFrames:          append([]runtime.Frame{}, b.deck.frames[:b.deck.firstPCFrames]...),
   599  	}
   600  
   601  	start := b.pb.startMessage()
   602  	b.pb.uint64Opt(tagLocation_ID, id)
   603  	b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
   604  	for _, frame := range b.deck.frames {
   605  		// Write out each line in frame expansion.
   606  		funcID := uint64(b.funcs[frame.Function])
   607  		if funcID == 0 {
   608  			funcID = uint64(len(b.funcs)) + 1
   609  			b.funcs[frame.Function] = int(funcID)
   610  			newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
   611  		}
   612  		b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
   613  	}
   614  	for i := range b.mem {
   615  		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
   616  			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
   617  
   618  			m := b.mem[i]
   619  			m.funcs |= b.deck.symbolizeResult
   620  			b.mem[i] = m
   621  			break
   622  		}
   623  	}
   624  	b.pb.endMessage(tagProfile_Location, start)
   625  
   626  	// Write out functions we found during frame expansion.
   627  	for _, fn := range newFuncs {
   628  		start := b.pb.startMessage()
   629  		b.pb.uint64Opt(tagFunction_ID, fn.id)
   630  		b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
   631  		b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
   632  		b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
   633  		b.pb.endMessage(tagProfile_Function, start)
   634  	}
   635  
   636  	b.flush()
   637  	return id
   638  }
   639  
   640  // readMapping reads /proc/self/maps and writes mappings to b.pb.
   641  // It saves the address ranges of the mappings in b.mem for use
   642  // when emitting locations.
   643  func (b *profileBuilder) readMapping() {
   644  	data, _ := os.ReadFile("/proc/self/maps")
   645  	parseProcSelfMaps(data, b.addMapping)
   646  	if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
   647  		b.addMappingEntry(0, 0, 0, "", "", true)
   648  		// TODO(hyangah): make addMapping return *memMap or
   649  		// take a memMap struct, and get rid of addMappingEntry
   650  		// that takes a bunch of positional arguments.
   651  	}
   652  }
   653  
   654  var space = []byte(" ")
   655  var newline = []byte("\n")
   656  
   657  func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
   658  	// $ cat /proc/self/maps
   659  	// 00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
   660  	// 0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat
   661  	// 0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat
   662  	// 014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
   663  	// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
   664  	// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   665  	// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   666  	// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   667  	// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
   668  	// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
   669  	// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   670  	// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
   671  	// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
   672  	// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   673  	// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
   674  	// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
   675  	// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
   676  	// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
   677  	// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
   678  
   679  	var line []byte
   680  	// next removes and returns the next field in the line.
   681  	// It also removes from line any spaces following the field.
   682  	next := func() []byte {
   683  		var f []byte
   684  		f, line, _ = bytesCut(line, space)
   685  		line = bytes.TrimLeft(line, " ")
   686  		return f
   687  	}
   688  
   689  	for len(data) > 0 {
   690  		line, data, _ = bytesCut(data, newline)
   691  		addr := next()
   692  		loStr, hiStr, ok := stringsCut(string(addr), "-")
   693  		if !ok {
   694  			continue
   695  		}
   696  		lo, err := strconv.ParseUint(loStr, 16, 64)
   697  		if err != nil {
   698  			continue
   699  		}
   700  		hi, err := strconv.ParseUint(hiStr, 16, 64)
   701  		if err != nil {
   702  			continue
   703  		}
   704  		perm := next()
   705  		if len(perm) < 4 || perm[2] != 'x' {
   706  			// Only interested in executable mappings.
   707  			continue
   708  		}
   709  		offset, err := strconv.ParseUint(string(next()), 16, 64)
   710  		if err != nil {
   711  			continue
   712  		}
   713  		next()          // dev
   714  		inode := next() // inode
   715  		if line == nil {
   716  			continue
   717  		}
   718  		file := string(line)
   719  
   720  		// Trim deleted file marker.
   721  		deletedStr := " (deleted)"
   722  		deletedLen := len(deletedStr)
   723  		if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
   724  			file = file[:len(file)-deletedLen]
   725  		}
   726  
   727  		if len(inode) == 1 && inode[0] == '0' && file == "" {
   728  			// Huge-page text mappings list the initial fragment of
   729  			// mapped but unpopulated memory as being inode 0.
   730  			// Don't report that part.
   731  			// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
   732  			continue
   733  		}
   734  
   735  		// TODO: pprof's remapMappingIDs makes two adjustments:
   736  		// 1. If there is an /anon_hugepage mapping first and it is
   737  		// consecutive to a next mapping, drop the /anon_hugepage.
   738  		// 2. If start-offset = 0x400000, change start to 0x400000 and offset to 0.
   739  		// There's no indication why either of these is needed.
   740  		// Let's try not doing these and see what breaks.
   741  		// If we do need them, they would go here, before we
   742  		// enter the mappings into b.mem in the first place.
   743  
   744  		buildID, _ := elfBuildID(file)
   745  		addMapping(lo, hi, offset, file, buildID)
   746  	}
   747  }
   748  
   749  func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
   750  	b.addMappingEntry(lo, hi, offset, file, buildID, false)
   751  }
   752  
   753  func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
   754  	b.mem = append(b.mem, memMap{
   755  		start:   uintptr(lo),
   756  		end:     uintptr(hi),
   757  		offset:  offset,
   758  		file:    file,
   759  		buildID: buildID,
   760  		fake:    fake,
   761  	})
   762  }
   763  
   764  // Cut slices s around the first instance of sep,
   765  // returning the text before and after sep.
   766  // The found result reports whether sep appears in s.
   767  // If sep does not appear in s, cut returns s, nil, false.
   768  //
   769  // Cut returns slices of the original slice s, not copies.
   770  func bytesCut(s, sep []byte) (before, after []byte, found bool) {
   771  	if i := bytes.Index(s, sep); i >= 0 {
   772  		return s[:i], s[i+len(sep):], true
   773  	}
   774  	return s, nil, false
   775  }
   776  
   777  // Cut slices s around the first instance of sep,
   778  // returning the text before and after sep.
   779  // The found result reports whether sep appears in s.
   780  // If sep does not appear in s, cut returns s, "", false.
   781  func stringsCut(s, sep string) (before, after string, found bool) {
   782  	if i := strings.Index(s, sep); i >= 0 {
   783  		return s[:i], s[i+len(sep):], true
   784  	}
   785  	return s, "", false
   786  }