github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/mprof.go

github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/mprof.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc profiling.
     6  // Patterned after tcmalloc's algorithms; shorter code.
     7  
     8  package runtime
     9  
    10  import (
    11  	"internal/abi"
    12  	"runtime/internal/atomic"
    13  	"runtime/internal/sys"
    14  	"unsafe"
    15  )
    16  
    17  // NOTE(rsc): Everything here could use cas if contention became an issue.
    18  var (
    19  	// profInsertLock protects changes to the start of all *bucket linked lists
    20  	profInsertLock mutex
    21  	// profBlockLock protects the contents of every blockRecord struct
    22  	profBlockLock mutex
    23  	// profMemActiveLock protects the active field of every memRecord struct
    24  	profMemActiveLock mutex
    25  	// profMemFutureLock is a set of locks that protect the respective elements
    26  	// of the future array of every memRecord struct
    27  	profMemFutureLock [len(memRecord{}.future)]mutex
    28  )
    29  
    30  // All memory allocations are local and do not escape outside of the profiler.
    31  // The profiler is forbidden from referring to garbage-collected memory.
    32  
    33  const (
    34  	// profile types
    35  	memProfile bucketType = 1 + iota
    36  	blockProfile
    37  	mutexProfile
    38  
    39  	// size of bucket hash table
    40  	buckHashSize = 179999
    41  
    42  	// max depth of stack to record in bucket
    43  	maxStack = 32
    44  )
    45  
    46  type bucketType int
    47  
    48  // A bucket holds per-call-stack profiling information.
    49  // The representation is a bit sleazy, inherited from C.
    50  // This struct defines the bucket header. It is followed in
    51  // memory by the stack words and then the actual record
    52  // data, either a memRecord or a blockRecord.
    53  //
    54  // Per-call-stack profiling information.
    55  // Lookup by hashing call stack into a linked-list hash table.
    56  //
    57  // None of the fields in this bucket header are modified after
    58  // creation, including its next and allnext links.
    59  //
    60  // No heap pointers.
    61  type bucket struct {
    62  	_       sys.NotInHeap
    63  	next    *bucket
    64  	allnext *bucket
    65  	typ     bucketType // memBucket or blockBucket (includes mutexProfile)
    66  	hash    uintptr
    67  	size    uintptr
    68  	nstk    uintptr
    69  }
    70  
    71  // A memRecord is the bucket data for a bucket of type memProfile,
    72  // part of the memory profile.
    73  type memRecord struct {
    74  	// The following complex 3-stage scheme of stats accumulation
    75  	// is required to obtain a consistent picture of mallocs and frees
    76  	// for some point in time.
    77  	// The problem is that mallocs come in real time, while frees
    78  	// come only after a GC during concurrent sweeping. So if we would
    79  	// naively count them, we would get a skew toward mallocs.
    80  	//
    81  	// Hence, we delay information to get consistent snapshots as
    82  	// of mark termination. Allocations count toward the next mark
    83  	// termination's snapshot, while sweep frees count toward the
    84  	// previous mark termination's snapshot:
    85  	//
    86  	//              MT          MT          MT          MT
    87  	//             .·|         .·|         .·|         .·|
    88  	//          .·˙  |      .·˙  |      .·˙  |      .·˙  |
    89  	//       .·˙     |   .·˙     |   .·˙     |   .·˙     |
    90  	//    .·˙        |.·˙        |.·˙        |.·˙        |
    91  	//
    92  	//       alloc → ▲ ← free
    93  	//               ┠┅┅┅┅┅┅┅┅┅┅┅P
    94  	//       C+2     →    C+1    →  C
    95  	//
    96  	//                   alloc → ▲ ← free
    97  	//                           ┠┅┅┅┅┅┅┅┅┅┅┅P
    98  	//                   C+2     →    C+1    →  C
    99  	//
   100  	// Since we can't publish a consistent snapshot until all of
   101  	// the sweep frees are accounted for, we wait until the next
   102  	// mark termination ("MT" above) to publish the previous mark
   103  	// termination's snapshot ("P" above). To do this, allocation
   104  	// and free events are accounted to *future* heap profile
   105  	// cycles ("C+n" above) and we only publish a cycle once all
   106  	// of the events from that cycle must be done. Specifically:
   107  	//
   108  	// Mallocs are accounted to cycle C+2.
   109  	// Explicit frees are accounted to cycle C+2.
   110  	// GC frees (done during sweeping) are accounted to cycle C+1.
   111  	//
   112  	// After mark termination, we increment the global heap
   113  	// profile cycle counter and accumulate the stats from cycle C
   114  	// into the active profile.
   115  
   116  	// active is the currently published profile. A profiling
   117  	// cycle can be accumulated into active once its complete.
   118  	active memRecordCycle
   119  
   120  	// future records the profile events we're counting for cycles
   121  	// that have not yet been published. This is ring buffer
   122  	// indexed by the global heap profile cycle C and stores
   123  	// cycles C, C+1, and C+2. Unlike active, these counts are
   124  	// only for a single cycle; they are not cumulative across
   125  	// cycles.
   126  	//
   127  	// We store cycle C here because there's a window between when
   128  	// C becomes the active cycle and when we've flushed it to
   129  	// active.
   130  	future [3]memRecordCycle
   131  }
   132  
   133  // memRecordCycle
   134  type memRecordCycle struct {
   135  	allocs, frees           uintptr
   136  	alloc_bytes, free_bytes uintptr
   137  }
   138  
   139  // add accumulates b into a. It does not zero b.
   140  func (a *memRecordCycle) add(b *memRecordCycle) {
   141  	a.allocs += b.allocs
   142  	a.frees += b.frees
   143  	a.alloc_bytes += b.alloc_bytes
   144  	a.free_bytes += b.free_bytes
   145  }
   146  
   147  // A blockRecord is the bucket data for a bucket of type blockProfile,
   148  // which is used in blocking and mutex profiles.
   149  type blockRecord struct {
   150  	count  float64
   151  	cycles int64
   152  }
   153  
   154  var (
   155  	mbuckets atomic.UnsafePointer // *bucket, memory profile buckets
   156  	bbuckets atomic.UnsafePointer // *bucket, blocking profile buckets
   157  	xbuckets atomic.UnsafePointer // *bucket, mutex profile buckets
   158  	buckhash atomic.UnsafePointer // *buckhashArray
   159  
   160  	mProfCycle mProfCycleHolder
   161  )
   162  
   163  type buckhashArray [buckHashSize]atomic.UnsafePointer // *bucket
   164  
   165  const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
   166  
   167  // mProfCycleHolder holds the global heap profile cycle number (wrapped at
   168  // mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to
   169  // indicate whether future[cycle] in all buckets has been queued to flush into
   170  // the active profile.
   171  type mProfCycleHolder struct {
   172  	value atomic.Uint32
   173  }
   174  
   175  // read returns the current cycle count.
   176  func (c *mProfCycleHolder) read() (cycle uint32) {
   177  	v := c.value.Load()
   178  	cycle = v >> 1
   179  	return cycle
   180  }
   181  
   182  // setFlushed sets the flushed flag. It returns the current cycle count and the
   183  // previous value of the flushed flag.
   184  func (c *mProfCycleHolder) setFlushed() (cycle uint32, alreadyFlushed bool) {
   185  	for {
   186  		prev := c.value.Load()
   187  		cycle = prev >> 1
   188  		alreadyFlushed = (prev & 0x1) != 0
   189  		next := prev | 0x1
   190  		if c.value.CompareAndSwap(prev, next) {
   191  			return cycle, alreadyFlushed
   192  		}
   193  	}
   194  }
   195  
   196  // increment increases the cycle count by one, wrapping the value at
   197  // mProfCycleWrap. It clears the flushed flag.
   198  func (c *mProfCycleHolder) increment() {
   199  	// We explicitly wrap mProfCycle rather than depending on
   200  	// uint wraparound because the memRecord.future ring does not
   201  	// itself wrap at a power of two.
   202  	for {
   203  		prev := c.value.Load()
   204  		cycle := prev >> 1
   205  		cycle = (cycle + 1) % mProfCycleWrap
   206  		next := cycle << 1
   207  		if c.value.CompareAndSwap(prev, next) {
   208  			break
   209  		}
   210  	}
   211  }
   212  
   213  // newBucket allocates a bucket with the given type and number of stack entries.
   214  func newBucket(typ bucketType, nstk int) *bucket {
   215  	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
   216  	switch typ {
   217  	default:
   218  		throw("invalid profile bucket type")
   219  	case memProfile:
   220  		size += unsafe.Sizeof(memRecord{})
   221  	case blockProfile, mutexProfile:
   222  		size += unsafe.Sizeof(blockRecord{})
   223  	}
   224  
   225  	b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
   226  	b.typ = typ
   227  	b.nstk = uintptr(nstk)
   228  	return b
   229  }
   230  
   231  // stk returns the slice in b holding the stack.
   232  func (b *bucket) stk() []uintptr {
   233  	stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
   234  	return stk[:b.nstk:b.nstk]
   235  }
   236  
   237  // mp returns the memRecord associated with the memProfile bucket b.
   238  func (b *bucket) mp() *memRecord {
   239  	if b.typ != memProfile {
   240  		throw("bad use of bucket.mp")
   241  	}
   242  	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
   243  	return (*memRecord)(data)
   244  }
   245  
   246  // bp returns the blockRecord associated with the blockProfile bucket b.
   247  func (b *bucket) bp() *blockRecord {
   248  	if b.typ != blockProfile && b.typ != mutexProfile {
   249  		throw("bad use of bucket.bp")
   250  	}
   251  	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
   252  	return (*blockRecord)(data)
   253  }
   254  
   255  // Return the bucket for stk[0:nstk], allocating new bucket if needed.
   256  func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket {
   257  	bh := (*buckhashArray)(buckhash.Load())
   258  	if bh == nil {
   259  		lock(&profInsertLock)
   260  		// check again under the lock
   261  		bh = (*buckhashArray)(buckhash.Load())
   262  		if bh == nil {
   263  			bh = (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys))
   264  			if bh == nil {
   265  				throw("runtime: cannot allocate memory")
   266  			}
   267  			buckhash.StoreNoWB(unsafe.Pointer(bh))
   268  		}
   269  		unlock(&profInsertLock)
   270  	}
   271  
   272  	// Hash stack.
   273  	var h uintptr
   274  	for _, pc := range stk {
   275  		h += pc
   276  		h += h << 10
   277  		h ^= h >> 6
   278  	}
   279  	// hash in size
   280  	h += size
   281  	h += h << 10
   282  	h ^= h >> 6
   283  	// finalize
   284  	h += h << 3
   285  	h ^= h >> 11
   286  
   287  	i := int(h % buckHashSize)
   288  	// first check optimistically, without the lock
   289  	for b := (*bucket)(bh[i].Load()); b != nil; b = b.next {
   290  		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
   291  			return b
   292  		}
   293  	}
   294  
   295  	if !alloc {
   296  		return nil
   297  	}
   298  
   299  	lock(&profInsertLock)
   300  	// check again under the insertion lock
   301  	for b := (*bucket)(bh[i].Load()); b != nil; b = b.next {
   302  		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
   303  			unlock(&profInsertLock)
   304  			return b
   305  		}
   306  	}
   307  
   308  	// Create new bucket.
   309  	b := newBucket(typ, len(stk))
   310  	copy(b.stk(), stk)
   311  	b.hash = h
   312  	b.size = size
   313  
   314  	var allnext *atomic.UnsafePointer
   315  	if typ == memProfile {
   316  		allnext = &mbuckets
   317  	} else if typ == mutexProfile {
   318  		allnext = &xbuckets
   319  	} else {
   320  		allnext = &bbuckets
   321  	}
   322  
   323  	b.next = (*bucket)(bh[i].Load())
   324  	b.allnext = (*bucket)(allnext.Load())
   325  
   326  	bh[i].StoreNoWB(unsafe.Pointer(b))
   327  	allnext.StoreNoWB(unsafe.Pointer(b))
   328  
   329  	unlock(&profInsertLock)
   330  	return b
   331  }
   332  
   333  func eqslice(x, y []uintptr) bool {
   334  	if len(x) != len(y) {
   335  		return false
   336  	}
   337  	for i, xi := range x {
   338  		if xi != y[i] {
   339  			return false
   340  		}
   341  	}
   342  	return true
   343  }
   344  
   345  // mProf_NextCycle publishes the next heap profile cycle and creates a
   346  // fresh heap profile cycle. This operation is fast and can be done
   347  // during STW. The caller must call mProf_Flush before calling
   348  // mProf_NextCycle again.
   349  //
   350  // This is called by mark termination during STW so allocations and
   351  // frees after the world is started again count towards a new heap
   352  // profiling cycle.
   353  func mProf_NextCycle() {
   354  	mProfCycle.increment()
   355  }
   356  
   357  // mProf_Flush flushes the events from the current heap profiling
   358  // cycle into the active profile. After this it is safe to start a new
   359  // heap profiling cycle with mProf_NextCycle.
   360  //
   361  // This is called by GC after mark termination starts the world. In
   362  // contrast with mProf_NextCycle, this is somewhat expensive, but safe
   363  // to do concurrently.
   364  func mProf_Flush() {
   365  	cycle, alreadyFlushed := mProfCycle.setFlushed()
   366  	if alreadyFlushed {
   367  		return
   368  	}
   369  
   370  	index := cycle % uint32(len(memRecord{}.future))
   371  	lock(&profMemActiveLock)
   372  	lock(&profMemFutureLock[index])
   373  	mProf_FlushLocked(index)
   374  	unlock(&profMemFutureLock[index])
   375  	unlock(&profMemActiveLock)
   376  }
   377  
   378  // mProf_FlushLocked flushes the events from the heap profiling cycle at index
   379  // into the active profile. The caller must hold the lock for the active profile
   380  // (profMemActiveLock) and for the profiling cycle at index
   381  // (profMemFutureLock[index]).
   382  func mProf_FlushLocked(index uint32) {
   383  	assertLockHeld(&profMemActiveLock)
   384  	assertLockHeld(&profMemFutureLock[index])
   385  	head := (*bucket)(mbuckets.Load())
   386  	for b := head; b != nil; b = b.allnext {
   387  		mp := b.mp()
   388  
   389  		// Flush cycle C into the published profile and clear
   390  		// it for reuse.
   391  		mpc := &mp.future[index]
   392  		mp.active.add(mpc)
   393  		*mpc = memRecordCycle{}
   394  	}
   395  }
   396  
   397  // mProf_PostSweep records that all sweep frees for this GC cycle have
   398  // completed. This has the effect of publishing the heap profile
   399  // snapshot as of the last mark termination without advancing the heap
   400  // profile cycle.
   401  func mProf_PostSweep() {
   402  	// Flush cycle C+1 to the active profile so everything as of
   403  	// the last mark termination becomes visible. *Don't* advance
   404  	// the cycle, since we're still accumulating allocs in cycle
   405  	// C+2, which have to become C+1 in the next mark termination
   406  	// and so on.
   407  	cycle := mProfCycle.read() + 1
   408  
   409  	index := cycle % uint32(len(memRecord{}.future))
   410  	lock(&profMemActiveLock)
   411  	lock(&profMemFutureLock[index])
   412  	mProf_FlushLocked(index)
   413  	unlock(&profMemFutureLock[index])
   414  	unlock(&profMemActiveLock)
   415  }
   416  
   417  // Called by malloc to record a profiled block.
   418  func mProf_Malloc(p unsafe.Pointer, size uintptr) {
   419  	var stk [maxStack]uintptr
   420  	nstk := callers(4, stk[:])
   421  
   422  	index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
   423  
   424  	b := stkbucket(memProfile, size, stk[:nstk], true)
   425  	mp := b.mp()
   426  	mpc := &mp.future[index]
   427  
   428  	lock(&profMemFutureLock[index])
   429  	mpc.allocs++
   430  	mpc.alloc_bytes += size
   431  	unlock(&profMemFutureLock[index])
   432  
   433  	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of
   434  	// the profiler locks. This reduces potential contention and chances of
   435  	// deadlocks. Since the object must be alive during the call to
   436  	// mProf_Malloc, it's fine to do this non-atomically.
   437  	systemstack(func() {
   438  		setprofilebucket(p, b)
   439  	})
   440  }
   441  
   442  // Called when freeing a profiled block.
   443  func mProf_Free(b *bucket, size uintptr) {
   444  	index := (mProfCycle.read() + 1) % uint32(len(memRecord{}.future))
   445  
   446  	mp := b.mp()
   447  	mpc := &mp.future[index]
   448  
   449  	lock(&profMemFutureLock[index])
   450  	mpc.frees++
   451  	mpc.free_bytes += size
   452  	unlock(&profMemFutureLock[index])
   453  }
   454  
   455  var blockprofilerate uint64 // in CPU ticks
   456  
   457  // SetBlockProfileRate controls the fraction of goroutine blocking events
   458  // that are reported in the blocking profile. The profiler aims to sample
   459  // an average of one blocking event per rate nanoseconds spent blocked.
   460  //
   461  // To include every blocking event in the profile, pass rate = 1.
   462  // To turn off profiling entirely, pass rate <= 0.
   463  func SetBlockProfileRate(rate int) {
   464  	var r int64
   465  	if rate <= 0 {
   466  		r = 0 // disable profiling
   467  	} else if rate == 1 {
   468  		r = 1 // profile everything
   469  	} else {
   470  		// convert ns to cycles, use float64 to prevent overflow during multiplication
   471  		r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
   472  		if r == 0 {
   473  			r = 1
   474  		}
   475  	}
   476  
   477  	atomic.Store64(&blockprofilerate, uint64(r))
   478  }
   479  
   480  func blockevent(cycles int64, skip int) {
   481  	if cycles <= 0 {
   482  		cycles = 1
   483  	}
   484  
   485  	rate := int64(atomic.Load64(&blockprofilerate))
   486  	if blocksampled(cycles, rate) {
   487  		saveblockevent(cycles, rate, skip+1, blockProfile)
   488  	}
   489  }
   490  
   491  // blocksampled returns true for all events where cycles >= rate. Shorter
   492  // events have a cycles/rate random chance of returning true.
   493  func blocksampled(cycles, rate int64) bool {
   494  	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
   495  		return false
   496  	}
   497  	return true
   498  }
   499  
   500  func saveblockevent(cycles, rate int64, skip int, which bucketType) {
   501  	gp := getg()
   502  	var nstk int
   503  	var stk [maxStack]uintptr
   504  	if gp.m.curg == nil || gp.m.curg == gp {
   505  		nstk = callers(skip, stk[:])
   506  	} else {
   507  		nstk = gcallers(gp.m.curg, skip, stk[:])
   508  	}
   509  	b := stkbucket(which, 0, stk[:nstk], true)
   510  	bp := b.bp()
   511  
   512  	lock(&profBlockLock)
   513  	if which == blockProfile && cycles < rate {
   514  		// Remove sampling bias, see discussion on http://golang.org/cl/299991.
   515  		bp.count += float64(rate) / float64(cycles)
   516  		bp.cycles += rate
   517  	} else {
   518  		bp.count++
   519  		bp.cycles += cycles
   520  	}
   521  	unlock(&profBlockLock)
   522  }
   523  
   524  var mutexprofilerate uint64 // fraction sampled
   525  
   526  // SetMutexProfileFraction controls the fraction of mutex contention events
   527  // that are reported in the mutex profile. On average 1/rate events are
   528  // reported. The previous rate is returned.
   529  //
   530  // To turn off profiling entirely, pass rate 0.
   531  // To just read the current rate, pass rate < 0.
   532  // (For n>1 the details of sampling may change.)
   533  func SetMutexProfileFraction(rate int) int {
   534  	if rate < 0 {
   535  		return int(mutexprofilerate)
   536  	}
   537  	old := mutexprofilerate
   538  	atomic.Store64(&mutexprofilerate, uint64(rate))
   539  	return int(old)
   540  }
   541  
   542  //go:linkname mutexevent sync.event
   543  func mutexevent(cycles int64, skip int) {
   544  	if cycles < 0 {
   545  		cycles = 0
   546  	}
   547  	rate := int64(atomic.Load64(&mutexprofilerate))
   548  	// TODO(pjw): measure impact of always calling fastrand vs using something
   549  	// like malloc.go:nextSample()
   550  	if rate > 0 && int64(fastrand())%rate == 0 {
   551  		saveblockevent(cycles, rate, skip+1, mutexProfile)
   552  	}
   553  }
   554  
   555  // Go interface to profile data.
   556  
   557  // A StackRecord describes a single execution stack.
   558  type StackRecord struct {
   559  	Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
   560  }
   561  
   562  // Stack returns the stack trace associated with the record,
   563  // a prefix of r.Stack0.
   564  func (r *StackRecord) Stack() []uintptr {
   565  	for i, v := range r.Stack0 {
   566  		if v == 0 {
   567  			return r.Stack0[0:i]
   568  		}
   569  	}
   570  	return r.Stack0[0:]
   571  }
   572  
   573  // MemProfileRate controls the fraction of memory allocations
   574  // that are recorded and reported in the memory profile.
   575  // The profiler aims to sample an average of
   576  // one allocation per MemProfileRate bytes allocated.
   577  //
   578  // To include every allocated block in the profile, set MemProfileRate to 1.
   579  // To turn off profiling entirely, set MemProfileRate to 0.
   580  //
   581  // The tools that process the memory profiles assume that the
   582  // profile rate is constant across the lifetime of the program
   583  // and equal to the current value. Programs that change the
   584  // memory profiling rate should do so just once, as early as
   585  // possible in the execution of the program (for example,
   586  // at the beginning of main).
   587  var MemProfileRate int = 512 * 1024
   588  
   589  // disableMemoryProfiling is set by the linker if runtime.MemProfile
   590  // is not used and the link type guarantees nobody else could use it
   591  // elsewhere.
   592  var disableMemoryProfiling bool
   593  
   594  // A MemProfileRecord describes the live objects allocated
   595  // by a particular call sequence (stack trace).
   596  type MemProfileRecord struct {
   597  	AllocBytes, FreeBytes     int64       // number of bytes allocated, freed
   598  	AllocObjects, FreeObjects int64       // number of objects allocated, freed
   599  	Stack0                    [32]uintptr // stack trace for this record; ends at first 0 entry
   600  }
   601  
   602  // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
   603  func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
   604  
   605  // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
   606  func (r *MemProfileRecord) InUseObjects() int64 {
   607  	return r.AllocObjects - r.FreeObjects
   608  }
   609  
   610  // Stack returns the stack trace associated with the record,
   611  // a prefix of r.Stack0.
   612  func (r *MemProfileRecord) Stack() []uintptr {
   613  	for i, v := range r.Stack0 {
   614  		if v == 0 {
   615  			return r.Stack0[0:i]
   616  		}
   617  	}
   618  	return r.Stack0[0:]
   619  }
   620  
   621  // MemProfile returns a profile of memory allocated and freed per allocation
   622  // site.
   623  //
   624  // MemProfile returns n, the number of records in the current memory profile.
   625  // If len(p) >= n, MemProfile copies the profile into p and returns n, true.
   626  // If len(p) < n, MemProfile does not change p and returns n, false.
   627  //
   628  // If inuseZero is true, the profile includes allocation records
   629  // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
   630  // These are sites where memory was allocated, but it has all
   631  // been released back to the runtime.
   632  //
   633  // The returned profile may be up to two garbage collection cycles old.
   634  // This is to avoid skewing the profile toward allocations; because
   635  // allocations happen in real time but frees are delayed until the garbage
   636  // collector performs sweeping, the profile only accounts for allocations
   637  // that have had a chance to be freed by the garbage collector.
   638  //
   639  // Most clients should use the runtime/pprof package or
   640  // the testing package's -test.memprofile flag instead
   641  // of calling MemProfile directly.
   642  func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
   643  	cycle := mProfCycle.read()
   644  	// If we're between mProf_NextCycle and mProf_Flush, take care
   645  	// of flushing to the active profile so we only have to look
   646  	// at the active profile below.
   647  	index := cycle % uint32(len(memRecord{}.future))
   648  	lock(&profMemActiveLock)
   649  	lock(&profMemFutureLock[index])
   650  	mProf_FlushLocked(index)
   651  	unlock(&profMemFutureLock[index])
   652  	clear := true
   653  	head := (*bucket)(mbuckets.Load())
   654  	for b := head; b != nil; b = b.allnext {
   655  		mp := b.mp()
   656  		if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   657  			n++
   658  		}
   659  		if mp.active.allocs != 0 || mp.active.frees != 0 {
   660  			clear = false
   661  		}
   662  	}
   663  	if clear {
   664  		// Absolutely no data, suggesting that a garbage collection
   665  		// has not yet happened. In order to allow profiling when
   666  		// garbage collection is disabled from the beginning of execution,
   667  		// accumulate all of the cycles, and recount buckets.
   668  		n = 0
   669  		for b := head; b != nil; b = b.allnext {
   670  			mp := b.mp()
   671  			for c := range mp.future {
   672  				lock(&profMemFutureLock[c])
   673  				mp.active.add(&mp.future[c])
   674  				mp.future[c] = memRecordCycle{}
   675  				unlock(&profMemFutureLock[c])
   676  			}
   677  			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   678  				n++
   679  			}
   680  		}
   681  	}
   682  	if n <= len(p) {
   683  		ok = true
   684  		idx := 0
   685  		for b := head; b != nil; b = b.allnext {
   686  			mp := b.mp()
   687  			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   688  				record(&p[idx], b)
   689  				idx++
   690  			}
   691  		}
   692  	}
   693  	unlock(&profMemActiveLock)
   694  	return
   695  }
   696  
   697  // Write b's data to r.
   698  func record(r *MemProfileRecord, b *bucket) {
   699  	mp := b.mp()
   700  	r.AllocBytes = int64(mp.active.alloc_bytes)
   701  	r.FreeBytes = int64(mp.active.free_bytes)
   702  	r.AllocObjects = int64(mp.active.allocs)
   703  	r.FreeObjects = int64(mp.active.frees)
   704  	if raceenabled {
   705  		racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile))
   706  	}
   707  	if msanenabled {
   708  		msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   709  	}
   710  	if asanenabled {
   711  		asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   712  	}
   713  	copy(r.Stack0[:], b.stk())
   714  	for i := int(b.nstk); i < len(r.Stack0); i++ {
   715  		r.Stack0[i] = 0
   716  	}
   717  }
   718  
   719  func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
   720  	lock(&profMemActiveLock)
   721  	head := (*bucket)(mbuckets.Load())
   722  	for b := head; b != nil; b = b.allnext {
   723  		mp := b.mp()
   724  		fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees)
   725  	}
   726  	unlock(&profMemActiveLock)
   727  }
   728  
   729  // BlockProfileRecord describes blocking events originated
   730  // at a particular call sequence (stack trace).
   731  type BlockProfileRecord struct {
   732  	Count  int64
   733  	Cycles int64
   734  	StackRecord
   735  }
   736  
   737  // BlockProfile returns n, the number of records in the current blocking profile.
   738  // If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
   739  // If len(p) < n, BlockProfile does not change p and returns n, false.
   740  //
   741  // Most clients should use the runtime/pprof package or
   742  // the testing package's -test.blockprofile flag instead
   743  // of calling BlockProfile directly.
   744  func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
   745  	lock(&profBlockLock)
   746  	head := (*bucket)(bbuckets.Load())
   747  	for b := head; b != nil; b = b.allnext {
   748  		n++
   749  	}
   750  	if n <= len(p) {
   751  		ok = true
   752  		for b := head; b != nil; b = b.allnext {
   753  			bp := b.bp()
   754  			r := &p[0]
   755  			r.Count = int64(bp.count)
   756  			// Prevent callers from having to worry about division by zero errors.
   757  			// See discussion on http://golang.org/cl/299991.
   758  			if r.Count == 0 {
   759  				r.Count = 1
   760  			}
   761  			r.Cycles = bp.cycles
   762  			if raceenabled {
   763  				racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
   764  			}
   765  			if msanenabled {
   766  				msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   767  			}
   768  			if asanenabled {
   769  				asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   770  			}
   771  			i := copy(r.Stack0[:], b.stk())
   772  			for ; i < len(r.Stack0); i++ {
   773  				r.Stack0[i] = 0
   774  			}
   775  			p = p[1:]
   776  		}
   777  	}
   778  	unlock(&profBlockLock)
   779  	return
   780  }
   781  
   782  // MutexProfile returns n, the number of records in the current mutex profile.
   783  // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
   784  // Otherwise, MutexProfile does not change p, and returns n, false.
   785  //
   786  // Most clients should use the runtime/pprof package
   787  // instead of calling MutexProfile directly.
   788  func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
   789  	lock(&profBlockLock)
   790  	head := (*bucket)(xbuckets.Load())
   791  	for b := head; b != nil; b = b.allnext {
   792  		n++
   793  	}
   794  	if n <= len(p) {
   795  		ok = true
   796  		for b := head; b != nil; b = b.allnext {
   797  			bp := b.bp()
   798  			r := &p[0]
   799  			r.Count = int64(bp.count)
   800  			r.Cycles = bp.cycles
   801  			i := copy(r.Stack0[:], b.stk())
   802  			for ; i < len(r.Stack0); i++ {
   803  				r.Stack0[i] = 0
   804  			}
   805  			p = p[1:]
   806  		}
   807  	}
   808  	unlock(&profBlockLock)
   809  	return
   810  }
   811  
   812  // ThreadCreateProfile returns n, the number of records in the thread creation profile.
   813  // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
   814  // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
   815  //
   816  // Most clients should use the runtime/pprof package instead
   817  // of calling ThreadCreateProfile directly.
   818  func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
   819  	first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
   820  	for mp := first; mp != nil; mp = mp.alllink {
   821  		n++
   822  	}
   823  	if n <= len(p) {
   824  		ok = true
   825  		i := 0
   826  		for mp := first; mp != nil; mp = mp.alllink {
   827  			p[i].Stack0 = mp.createstack
   828  			i++
   829  		}
   830  	}
   831  	return
   832  }
   833  
   834  //go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
   835  func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
   836  	return goroutineProfileWithLabels(p, labels)
   837  }
   838  
   839  const go119ConcurrentGoroutineProfile = true
   840  
   841  // labels may be nil. If labels is non-nil, it must have the same length as p.
   842  func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
   843  	if labels != nil && len(labels) != len(p) {
   844  		labels = nil
   845  	}
   846  
   847  	if go119ConcurrentGoroutineProfile {
   848  		return goroutineProfileWithLabelsConcurrent(p, labels)
   849  	}
   850  	return goroutineProfileWithLabelsSync(p, labels)
   851  }
   852  
   853  var goroutineProfile = struct {
   854  	sema    uint32
   855  	active  bool
   856  	offset  atomic.Int64
   857  	records []StackRecord
   858  	labels  []unsafe.Pointer
   859  }{
   860  	sema: 1,
   861  }
   862  
   863  // goroutineProfileState indicates the status of a goroutine's stack for the
   864  // current in-progress goroutine profile. Goroutines' stacks are initially
   865  // "Absent" from the profile, and end up "Satisfied" by the time the profile is
   866  // complete. While a goroutine's stack is being captured, its
   867  // goroutineProfileState will be "InProgress" and it will not be able to run
   868  // until the capture completes and the state moves to "Satisfied".
   869  //
   870  // Some goroutines (the finalizer goroutine, which at various times can be
   871  // either a "system" or a "user" goroutine, and the goroutine that is
   872  // coordinating the profile, any goroutines created during the profile) move
   873  // directly to the "Satisfied" state.
   874  type goroutineProfileState uint32
   875  
   876  const (
   877  	goroutineProfileAbsent goroutineProfileState = iota
   878  	goroutineProfileInProgress
   879  	goroutineProfileSatisfied
   880  )
   881  
   882  type goroutineProfileStateHolder atomic.Uint32
   883  
   884  func (p *goroutineProfileStateHolder) Load() goroutineProfileState {
   885  	return goroutineProfileState((*atomic.Uint32)(p).Load())
   886  }
   887  
   888  func (p *goroutineProfileStateHolder) Store(value goroutineProfileState) {
   889  	(*atomic.Uint32)(p).Store(uint32(value))
   890  }
   891  
   892  func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileState) bool {
   893  	return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
   894  }
   895  
   896  func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
   897  	semacquire(&goroutineProfile.sema)
   898  
   899  	ourg := getg()
   900  
   901  	stopTheWorld("profile")
   902  	// Using gcount while the world is stopped should give us a consistent view
   903  	// of the number of live goroutines, minus the number of goroutines that are
   904  	// alive and permanently marked as "system". But to make this count agree
   905  	// with what we'd get from isSystemGoroutine, we need special handling for
   906  	// goroutines that can vary between user and system to ensure that the count
   907  	// doesn't change during the collection. So, check the finalizer goroutine
   908  	// in particular.
   909  	n = int(gcount())
   910  	if fingStatus.Load()&fingRunningFinalizer != 0 {
   911  		n++
   912  	}
   913  
   914  	if n > len(p) {
   915  		// There's not enough space in p to store the whole profile, so (per the
   916  		// contract of runtime.GoroutineProfile) we're not allowed to write to p
   917  		// at all and must return n, false.
   918  		startTheWorld()
   919  		semrelease(&goroutineProfile.sema)
   920  		return n, false
   921  	}
   922  
   923  	// Save current goroutine.
   924  	sp := getcallersp()
   925  	pc := getcallerpc()
   926  	systemstack(func() {
   927  		saveg(pc, sp, ourg, &p[0])
   928  	})
   929  	ourg.goroutineProfiled.Store(goroutineProfileSatisfied)
   930  	goroutineProfile.offset.Store(1)
   931  
   932  	// Prepare for all other goroutines to enter the profile. Aside from ourg,
   933  	// every goroutine struct in the allgs list has its goroutineProfiled field
   934  	// cleared. Any goroutine created from this point on (while
   935  	// goroutineProfile.active is set) will start with its goroutineProfiled
   936  	// field set to goroutineProfileSatisfied.
   937  	goroutineProfile.active = true
   938  	goroutineProfile.records = p
   939  	goroutineProfile.labels = labels
   940  	// The finalizer goroutine needs special handling because it can vary over
   941  	// time between being a user goroutine (eligible for this profile) and a
   942  	// system goroutine (to be excluded). Pick one before restarting the world.
   943  	if fing != nil {
   944  		fing.goroutineProfiled.Store(goroutineProfileSatisfied)
   945  		if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) {
   946  			doRecordGoroutineProfile(fing)
   947  		}
   948  	}
   949  	startTheWorld()
   950  
   951  	// Visit each goroutine that existed as of the startTheWorld call above.
   952  	//
   953  	// New goroutines may not be in this list, but we didn't want to know about
   954  	// them anyway. If they do appear in this list (via reusing a dead goroutine
   955  	// struct, or racing to launch between the world restarting and us getting
   956  	// the list), they will already have their goroutineProfiled field set to
   957  	// goroutineProfileSatisfied before their state transitions out of _Gdead.
   958  	//
   959  	// Any goroutine that the scheduler tries to execute concurrently with this
   960  	// call will start by adding itself to the profile (before the act of
   961  	// executing can cause any changes in its stack).
   962  	forEachGRace(func(gp1 *g) {
   963  		tryRecordGoroutineProfile(gp1, Gosched)
   964  	})
   965  
   966  	stopTheWorld("profile cleanup")
   967  	endOffset := goroutineProfile.offset.Swap(0)
   968  	goroutineProfile.active = false
   969  	goroutineProfile.records = nil
   970  	goroutineProfile.labels = nil
   971  	startTheWorld()
   972  
   973  	// Restore the invariant that every goroutine struct in allgs has its
   974  	// goroutineProfiled field cleared.
   975  	forEachGRace(func(gp1 *g) {
   976  		gp1.goroutineProfiled.Store(goroutineProfileAbsent)
   977  	})
   978  
   979  	if raceenabled {
   980  		raceacquire(unsafe.Pointer(&labelSync))
   981  	}
   982  
   983  	if n != int(endOffset) {
   984  		// It's a big surprise that the number of goroutines changed while we
   985  		// were collecting the profile. But probably better to return a
   986  		// truncated profile than to crash the whole process.
   987  		//
   988  		// For instance, needm moves a goroutine out of the _Gdead state and so
   989  		// might be able to change the goroutine count without interacting with
   990  		// the scheduler. For code like that, the race windows are small and the
   991  		// combination of features is uncommon, so it's hard to be (and remain)
   992  		// sure we've caught them all.
   993  	}
   994  
   995  	semrelease(&goroutineProfile.sema)
   996  	return n, true
   997  }
   998  
   999  // tryRecordGoroutineProfileWB asserts that write barriers are allowed and calls
  1000  // tryRecordGoroutineProfile.
  1001  //
  1002  //go:yeswritebarrierrec
  1003  func tryRecordGoroutineProfileWB(gp1 *g) {
  1004  	if getg().m.p.ptr() == nil {
  1005  		throw("no P available, write barriers are forbidden")
  1006  	}
  1007  	tryRecordGoroutineProfile(gp1, osyield)
  1008  }
  1009  
  1010  // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation
  1011  // in the current goroutine profile: either that it should not be profiled, or
  1012  // that a snapshot of its call stack and labels are now in the profile.
  1013  func tryRecordGoroutineProfile(gp1 *g, yield func()) {
  1014  	if readgstatus(gp1) == _Gdead {
  1015  		// Dead goroutines should not appear in the profile. Goroutines that
  1016  		// start while profile collection is active will get goroutineProfiled
  1017  		// set to goroutineProfileSatisfied before transitioning out of _Gdead,
  1018  		// so here we check _Gdead first.
  1019  		return
  1020  	}
  1021  	if isSystemGoroutine(gp1, true) {
  1022  		// System goroutines should not appear in the profile. (The finalizer
  1023  		// goroutine is marked as "already profiled".)
  1024  		return
  1025  	}
  1026  
  1027  	for {
  1028  		prev := gp1.goroutineProfiled.Load()
  1029  		if prev == goroutineProfileSatisfied {
  1030  			// This goroutine is already in the profile (or is new since the
  1031  			// start of collection, so shouldn't appear in the profile).
  1032  			break
  1033  		}
  1034  		if prev == goroutineProfileInProgress {
  1035  			// Something else is adding gp1 to the goroutine profile right now.
  1036  			// Give that a moment to finish.
  1037  			yield()
  1038  			continue
  1039  		}
  1040  
  1041  		// While we have gp1.goroutineProfiled set to
  1042  		// goroutineProfileInProgress, gp1 may appear _Grunnable but will not
  1043  		// actually be able to run. Disable preemption for ourselves, to make
  1044  		// sure we finish profiling gp1 right away instead of leaving it stuck
  1045  		// in this limbo.
  1046  		mp := acquirem()
  1047  		if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) {
  1048  			doRecordGoroutineProfile(gp1)
  1049  			gp1.goroutineProfiled.Store(goroutineProfileSatisfied)
  1050  		}
  1051  		releasem(mp)
  1052  	}
  1053  }
  1054  
  1055  // doRecordGoroutineProfile writes gp1's call stack and labels to an in-progress
  1056  // goroutine profile. Preemption is disabled.
  1057  //
  1058  // This may be called via tryRecordGoroutineProfile in two ways: by the
  1059  // goroutine that is coordinating the goroutine profile (running on its own
  1060  // stack), or from the scheduler in preparation to execute gp1 (running on the
  1061  // system stack).
  1062  func doRecordGoroutineProfile(gp1 *g) {
  1063  	if readgstatus(gp1) == _Grunning {
  1064  		print("doRecordGoroutineProfile gp1=", gp1.goid, "\n")
  1065  		throw("cannot read stack of running goroutine")
  1066  	}
  1067  
  1068  	offset := int(goroutineProfile.offset.Add(1)) - 1
  1069  
  1070  	if offset >= len(goroutineProfile.records) {
  1071  		// Should be impossible, but better to return a truncated profile than
  1072  		// to crash the entire process at this point. Instead, deal with it in
  1073  		// goroutineProfileWithLabelsConcurrent where we have more context.
  1074  		return
  1075  	}
  1076  
  1077  	// saveg calls gentraceback, which may call cgo traceback functions. When
  1078  	// called from the scheduler, this is on the system stack already so
  1079  	// traceback.go:cgoContextPCs will avoid calling back into the scheduler.
  1080  	//
  1081  	// When called from the goroutine coordinating the profile, we still have
  1082  	// set gp1.goroutineProfiled to goroutineProfileInProgress and so are still
  1083  	// preventing it from being truly _Grunnable. So we'll use the system stack
  1084  	// to avoid schedule delays.
  1085  	systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) })
  1086  
  1087  	if goroutineProfile.labels != nil {
  1088  		goroutineProfile.labels[offset] = gp1.labels
  1089  	}
  1090  }
  1091  
  1092  func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
  1093  	gp := getg()
  1094  
  1095  	isOK := func(gp1 *g) bool {
  1096  		// Checking isSystemGoroutine here makes GoroutineProfile
  1097  		// consistent with both NumGoroutine and Stack.
  1098  		return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
  1099  	}
  1100  
  1101  	stopTheWorld("profile")
  1102  
  1103  	// World is stopped, no locking required.
  1104  	n = 1
  1105  	forEachGRace(func(gp1 *g) {
  1106  		if isOK(gp1) {
  1107  			n++
  1108  		}
  1109  	})
  1110  
  1111  	if n <= len(p) {
  1112  		ok = true
  1113  		r, lbl := p, labels
  1114  
  1115  		// Save current goroutine.
  1116  		sp := getcallersp()
  1117  		pc := getcallerpc()
  1118  		systemstack(func() {
  1119  			saveg(pc, sp, gp, &r[0])
  1120  		})
  1121  		r = r[1:]
  1122  
  1123  		// If we have a place to put our goroutine labelmap, insert it there.
  1124  		if labels != nil {
  1125  			lbl[0] = gp.labels
  1126  			lbl = lbl[1:]
  1127  		}
  1128  
  1129  		// Save other goroutines.
  1130  		forEachGRace(func(gp1 *g) {
  1131  			if !isOK(gp1) {
  1132  				return
  1133  			}
  1134  
  1135  			if len(r) == 0 {
  1136  				// Should be impossible, but better to return a
  1137  				// truncated profile than to crash the entire process.
  1138  				return
  1139  			}
  1140  			// saveg calls gentraceback, which may call cgo traceback functions.
  1141  			// The world is stopped, so it cannot use cgocall (which will be
  1142  			// blocked at exitsyscall). Do it on the system stack so it won't
  1143  			// call into the schedular (see traceback.go:cgoContextPCs).
  1144  			systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) })
  1145  			if labels != nil {
  1146  				lbl[0] = gp1.labels
  1147  				lbl = lbl[1:]
  1148  			}
  1149  			r = r[1:]
  1150  		})
  1151  	}
  1152  
  1153  	if raceenabled {
  1154  		raceacquire(unsafe.Pointer(&labelSync))
  1155  	}
  1156  
  1157  	startTheWorld()
  1158  	return n, ok
  1159  }
  1160  
  1161  // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
  1162  // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
  1163  // If len(p) < n, GoroutineProfile does not change p and returns n, false.
  1164  //
  1165  // Most clients should use the runtime/pprof package instead
  1166  // of calling GoroutineProfile directly.
  1167  func GoroutineProfile(p []StackRecord) (n int, ok bool) {
  1168  
  1169  	return goroutineProfileWithLabels(p, nil)
  1170  }
  1171  
  1172  func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
  1173  	n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, 0)
  1174  	if n < len(r.Stack0) {
  1175  		r.Stack0[n] = 0
  1176  	}
  1177  }
  1178  
  1179  // Stack formats a stack trace of the calling goroutine into buf
  1180  // and returns the number of bytes written to buf.
  1181  // If all is true, Stack formats stack traces of all other goroutines
  1182  // into buf after the trace for the current goroutine.
  1183  func Stack(buf []byte, all bool) int {
  1184  	if all {
  1185  		stopTheWorld("stack trace")
  1186  	}
  1187  
  1188  	n := 0
  1189  	if len(buf) > 0 {
  1190  		gp := getg()
  1191  		sp := getcallersp()
  1192  		pc := getcallerpc()
  1193  		systemstack(func() {
  1194  			g0 := getg()
  1195  			// Force traceback=1 to override GOTRACEBACK setting,
  1196  			// so that Stack's results are consistent.
  1197  			// GOTRACEBACK is only about crash dumps.
  1198  			g0.m.traceback = 1
  1199  			g0.writebuf = buf[0:0:len(buf)]
  1200  			goroutineheader(gp)
  1201  			traceback(pc, sp, 0, gp)
  1202  			if all {
  1203  				tracebackothers(gp)
  1204  			}
  1205  			g0.m.traceback = 0
  1206  			n = len(g0.writebuf)
  1207  			g0.writebuf = nil
  1208  		})
  1209  	}
  1210  
  1211  	if all {
  1212  		startTheWorld()
  1213  	}
  1214  	return n
  1215  }
  1216  
  1217  // Tracing of alloc/free/gc.
  1218  
  1219  var tracelock mutex
  1220  
  1221  func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
  1222  	lock(&tracelock)
  1223  	gp := getg()
  1224  	gp.m.traceback = 2
  1225  	if typ == nil {
  1226  		print("tracealloc(", p, ", ", hex(size), ")\n")
  1227  	} else {
  1228  		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
  1229  	}
  1230  	if gp.m.curg == nil || gp == gp.m.curg {
  1231  		goroutineheader(gp)
  1232  		pc := getcallerpc()
  1233  		sp := getcallersp()
  1234  		systemstack(func() {
  1235  			traceback(pc, sp, 0, gp)
  1236  		})
  1237  	} else {
  1238  		goroutineheader(gp.m.curg)
  1239  		traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg)
  1240  	}
  1241  	print("\n")
  1242  	gp.m.traceback = 0
  1243  	unlock(&tracelock)
  1244  }
  1245  
  1246  func tracefree(p unsafe.Pointer, size uintptr) {
  1247  	lock(&tracelock)
  1248  	gp := getg()
  1249  	gp.m.traceback = 2
  1250  	print("tracefree(", p, ", ", hex(size), ")\n")
  1251  	goroutineheader(gp)
  1252  	pc := getcallerpc()
  1253  	sp := getcallersp()
  1254  	systemstack(func() {
  1255  		traceback(pc, sp, 0, gp)
  1256  	})
  1257  	print("\n")
  1258  	gp.m.traceback = 0
  1259  	unlock(&tracelock)
  1260  }
  1261  
  1262  func tracegc() {
  1263  	lock(&tracelock)
  1264  	gp := getg()
  1265  	gp.m.traceback = 2
  1266  	print("tracegc()\n")
  1267  	// running on m->g0 stack; show all non-g0 goroutines
  1268  	tracebackothers(gp)
  1269  	print("end tracegc\n")
  1270  	print("\n")
  1271  	gp.m.traceback = 0
  1272  	unlock(&tracelock)
  1273  }