github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/runtime/mstkbar.go

github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/runtime/mstkbar.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector: stack barriers
     6  //
     7  // Stack barriers enable the garbage collector to determine how much
     8  // of a gorountine stack has changed between when a stack is scanned
     9  // during the concurrent scan phase and when it is re-scanned during
    10  // the stop-the-world mark termination phase. Mark termination only
    11  // needs to re-scan the changed part, so for deep stacks this can
    12  // significantly reduce GC pause time compared to the alternative of
    13  // re-scanning whole stacks. The deeper the stacks, the more stack
    14  // barriers help.
    15  //
    16  // When stacks are scanned during the concurrent scan phase, the stack
    17  // scan installs stack barriers by selecting stack frames and
    18  // overwriting the saved return PCs (or link registers) of these
    19  // frames with the PC of a "stack barrier trampoline". Later, when a
    20  // selected frame returns, it "returns" to this trampoline instead of
    21  // returning to its actual caller. The trampoline records that the
    22  // stack has unwound past this frame and jumps to the original return
    23  // PC recorded when the stack barrier was installed. Mark termination
    24  // re-scans only as far as the first frame that hasn't hit a stack
    25  // barrier and then removes and un-hit stack barriers.
    26  //
    27  // This scheme is very lightweight. No special code is required in the
    28  // mutator to record stack unwinding and the trampoline is only a few
    29  // assembly instructions.
    30  //
    31  // Book-keeping
    32  // ------------
    33  //
    34  // The primary cost of stack barriers is book-keeping: the runtime has
    35  // to record the locations of all stack barriers and the original
    36  // return PCs in order to return to the correct caller when a stack
    37  // barrier is hit and so it can remove un-hit stack barriers. In order
    38  // to minimize this cost, the Go runtime places stack barriers in
    39  // exponentially-spaced frames, starting 1K past the current frame.
    40  // The book-keeping structure hence grows logarithmically with the
    41  // size of the stack and mark termination re-scans at most twice as
    42  // much stack as necessary.
    43  //
    44  // The runtime reserves space for this book-keeping structure at the
    45  // top of the stack allocation itself (just above the outermost
    46  // frame). This is necessary because the regular memory allocator can
    47  // itself grow the stack, and hence can't be used when allocating
    48  // stack-related structures.
    49  //
    50  // For debugging, the runtime also supports installing stack barriers
    51  // at every frame. However, this requires significantly more
    52  // book-keeping space.
    53  //
    54  // Correctness
    55  // -----------
    56  //
    57  // The runtime and the compiler cooperate to ensure that all objects
    58  // reachable from the stack as of mark termination are marked.
    59  // Anything unchanged since the concurrent scan phase will be marked
    60  // because it is marked by the concurrent scan. After the concurrent
    61  // scan, there are three possible classes of stack modifications that
    62  // must be tracked:
    63  //
    64  // 1) Mutator writes below the lowest un-hit stack barrier. This
    65  // includes all writes performed by an executing function to its own
    66  // stack frame. This part of the stack will be re-scanned by mark
    67  // termination, which will mark any objects made reachable from
    68  // modifications to this part of the stack.
    69  //
    70  // 2) Mutator writes above the lowest un-hit stack barrier. It's
    71  // possible for a mutator to modify the stack above the lowest un-hit
    72  // stack barrier if a higher frame has passed down a pointer to a
    73  // stack variable in its frame. This is called an "up-pointer". The
    74  // compiler ensures that writes through up-pointers have an
    75  // accompanying write barrier (it simply doesn't distinguish between
    76  // writes through up-pointers and writes through heap pointers). This
    77  // write barrier marks any object made reachable from modifications to
    78  // this part of the stack.
    79  //
    80  // 3) Runtime writes to the stack. Various runtime operations such as
    81  // sends to unbuffered channels can write to arbitrary parts of the
    82  // stack, including above the lowest un-hit stack barrier. We solve
    83  // this in two ways. In many cases, the runtime can perform an
    84  // explicit write barrier operation like in case 2. However, in the
    85  // case of bulk memory move (typedmemmove), the runtime doesn't
    86  // necessary have ready access to a pointer bitmap for the memory
    87  // being copied, so it simply unwinds any stack barriers below the
    88  // destination.
    89  //
    90  // Gotchas
    91  // -------
    92  //
    93  // Anything that inspects or manipulates the stack potentially needs
    94  // to understand stack barriers. The most obvious case is that
    95  // gentraceback needs to use the original return PC when it encounters
    96  // the stack barrier trampoline. Anything that unwinds the stack such
    97  // as panic/recover must unwind stack barriers in tandem with
    98  // unwinding the stack.
    99  //
   100  // Stack barriers require that any goroutine whose stack has been
   101  // scanned must execute write barriers. Go solves this by simply
   102  // enabling write barriers globally during the concurrent scan phase.
   103  // However, traditionally, write barriers are not enabled during this
   104  // phase.
   105  //
   106  // Synchronization
   107  // ---------------
   108  //
   109  // For the most part, accessing and modifying stack barriers is
   110  // synchronized around GC safe points. Installing stack barriers
   111  // forces the G to a safe point, while all other operations that
   112  // modify stack barriers run on the G and prevent it from reaching a
   113  // safe point.
   114  //
   115  // Subtlety arises when a G may be tracebacked when *not* at a safe
   116  // point. This happens during sigprof. For this, each G has a "stack
   117  // barrier lock" (see gcLockStackBarriers, gcUnlockStackBarriers).
   118  // Operations that manipulate stack barriers acquire this lock, while
   119  // sigprof tries to acquire it and simply skips the traceback if it
   120  // can't acquire it. There is one exception for performance and
   121  // complexity reasons: hitting a stack barrier manipulates the stack
   122  // barrier list without acquiring the stack barrier lock. For this,
   123  // gentraceback performs a special fix up if the traceback starts in
   124  // the stack barrier function.
   125  
   126  package runtime
   127  
   128  import (
   129  	"runtime/internal/atomic"
   130  	"runtime/internal/sys"
   131  	"unsafe"
   132  )
   133  
   134  const debugStackBarrier = false
   135  
   136  // firstStackBarrierOffset is the approximate byte offset at
   137  // which to place the first stack barrier from the current SP.
   138  // This is a lower bound on how much stack will have to be
   139  // re-scanned during mark termination. Subsequent barriers are
   140  // placed at firstStackBarrierOffset * 2^n offsets.
   141  //
   142  // For debugging, this can be set to 0, which will install a
   143  // stack barrier at every frame. If you do this, you may also
   144  // have to raise _StackMin, since the stack barrier
   145  // bookkeeping will use a large amount of each stack.
   146  var firstStackBarrierOffset = 1024
   147  
   148  // gcMaxStackBarriers returns the maximum number of stack barriers
   149  // that can be installed in a stack of stackSize bytes.
   150  func gcMaxStackBarriers(stackSize int) (n int) {
   151  	if firstStackBarrierOffset == 0 {
   152  		// Special debugging case for inserting stack barriers
   153  		// at every frame. Steal half of the stack for the
   154  		// []stkbar. Technically, if the stack were to consist
   155  		// solely of return PCs we would need two thirds of
   156  		// the stack, but stealing that much breaks things and
   157  		// this doesn't happen in practice.
   158  		return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
   159  	}
   160  
   161  	offset := firstStackBarrierOffset
   162  	for offset < stackSize {
   163  		n++
   164  		offset *= 2
   165  	}
   166  	return n + 1
   167  }
   168  
   169  // gcInstallStackBarrier installs a stack barrier over the return PC of frame.
   170  //go:nowritebarrier
   171  func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
   172  	if frame.lr == 0 {
   173  		if debugStackBarrier {
   174  			print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
   175  		}
   176  		return false
   177  	}
   178  
   179  	if frame.fn.entry == cgocallback_gofuncPC {
   180  		// cgocallback_gofunc doesn't return to its LR;
   181  		// instead, its return path puts LR in g.sched.pc and
   182  		// switches back to the system stack on which
   183  		// cgocallback_gofunc was originally called. We can't
   184  		// have a stack barrier in g.sched.pc, so don't
   185  		// install one in this frame.
   186  		if debugStackBarrier {
   187  			print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
   188  		}
   189  		return false
   190  	}
   191  
   192  	// Save the return PC and overwrite it with stackBarrier.
   193  	var lrUintptr uintptr
   194  	if usesLR {
   195  		lrUintptr = frame.sp
   196  	} else {
   197  		lrUintptr = frame.fp - sys.RegSize
   198  	}
   199  	lrPtr := (*sys.Uintreg)(unsafe.Pointer(lrUintptr))
   200  	if debugStackBarrier {
   201  		print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
   202  		if uintptr(*lrPtr) != frame.lr {
   203  			print("frame.lr=", hex(frame.lr))
   204  			throw("frame.lr differs from stack LR")
   205  		}
   206  	}
   207  
   208  	gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
   209  	stkbar := &gp.stkbar[len(gp.stkbar)-1]
   210  	stkbar.savedLRPtr = lrUintptr
   211  	stkbar.savedLRVal = uintptr(*lrPtr)
   212  	*lrPtr = sys.Uintreg(stackBarrierPC)
   213  	return true
   214  }
   215  
   216  // gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
   217  //go:nowritebarrier
   218  func gcRemoveStackBarriers(gp *g) {
   219  	if debugStackBarrier && gp.stkbarPos != 0 {
   220  		print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
   221  	}
   222  
   223  	gcLockStackBarriers(gp)
   224  
   225  	// Remove stack barriers that we didn't hit.
   226  	for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
   227  		gcRemoveStackBarrier(gp, stkbar)
   228  	}
   229  
   230  	// Clear recorded stack barriers so copystack doesn't try to
   231  	// adjust them.
   232  	gp.stkbarPos = 0
   233  	gp.stkbar = gp.stkbar[:0]
   234  
   235  	gcUnlockStackBarriers(gp)
   236  }
   237  
   238  // gcRemoveStackBarrier removes a single stack barrier. It is the
   239  // inverse operation of gcInstallStackBarrier.
   240  //
   241  // This is nosplit to ensure gp's stack does not move.
   242  //
   243  //go:nowritebarrier
   244  //go:nosplit
   245  func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
   246  	if debugStackBarrier {
   247  		print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
   248  	}
   249  	lrPtr := (*sys.Uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
   250  	if val := *lrPtr; val != sys.Uintreg(stackBarrierPC) {
   251  		printlock()
   252  		print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
   253  		print("gp.stkbar=")
   254  		gcPrintStkbars(gp, -1)
   255  		print(", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
   256  		throw("stack barrier lost")
   257  	}
   258  	*lrPtr = sys.Uintreg(stkbar.savedLRVal)
   259  }
   260  
   261  // gcPrintStkbars prints the stack barriers of gp for debugging. It
   262  // places a "@@@" marker at gp.stkbarPos. If marker >= 0, it will also
   263  // place a "==>" marker before the marker'th entry.
   264  func gcPrintStkbars(gp *g, marker int) {
   265  	print("[")
   266  	for i, s := range gp.stkbar {
   267  		if i > 0 {
   268  			print(" ")
   269  		}
   270  		if i == int(gp.stkbarPos) {
   271  			print("@@@ ")
   272  		}
   273  		if i == marker {
   274  			print("==> ")
   275  		}
   276  		print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
   277  	}
   278  	if int(gp.stkbarPos) == len(gp.stkbar) {
   279  		print(" @@@")
   280  	}
   281  	if marker == len(gp.stkbar) {
   282  		print(" ==>")
   283  	}
   284  	print("]")
   285  }
   286  
   287  // gcUnwindBarriers marks all stack barriers up the frame containing
   288  // sp as hit and removes them. This is used during stack unwinding for
   289  // panic/recover and by heapBitsBulkBarrier to force stack re-scanning
   290  // when its destination is on the stack.
   291  //
   292  // This is nosplit to ensure gp's stack does not move.
   293  //
   294  //go:nosplit
   295  func gcUnwindBarriers(gp *g, sp uintptr) {
   296  	gcLockStackBarriers(gp)
   297  	// On LR machines, if there is a stack barrier on the return
   298  	// from the frame containing sp, this will mark it as hit even
   299  	// though it isn't, but it's okay to be conservative.
   300  	before := gp.stkbarPos
   301  	for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
   302  		gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
   303  		gp.stkbarPos++
   304  	}
   305  	gcUnlockStackBarriers(gp)
   306  	if debugStackBarrier && gp.stkbarPos != before {
   307  		print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
   308  		// We skipped barriers between the "==>" marker
   309  		// (before) and the "@@@" marker (gp.stkbarPos).
   310  		gcPrintStkbars(gp, int(before))
   311  		print("\n")
   312  	}
   313  }
   314  
   315  // nextBarrierPC returns the original return PC of the next stack barrier.
   316  // Used by getcallerpc, so it must be nosplit.
   317  //go:nosplit
   318  func nextBarrierPC() uintptr {
   319  	gp := getg()
   320  	return gp.stkbar[gp.stkbarPos].savedLRVal
   321  }
   322  
   323  // setNextBarrierPC sets the return PC of the next stack barrier.
   324  // Used by setcallerpc, so it must be nosplit.
   325  //go:nosplit
   326  func setNextBarrierPC(pc uintptr) {
   327  	gp := getg()
   328  	gcLockStackBarriers(gp)
   329  	gp.stkbar[gp.stkbarPos].savedLRVal = pc
   330  	gcUnlockStackBarriers(gp)
   331  }
   332  
   333  // gcLockStackBarriers synchronizes with tracebacks of gp's stack
   334  // during sigprof for installation or removal of stack barriers. It
   335  // blocks until any current sigprof is done tracebacking gp's stack
   336  // and then disallows profiling tracebacks of gp's stack.
   337  //
   338  // This is necessary because a sigprof during barrier installation or
   339  // removal could observe inconsistencies between the stkbar array and
   340  // the stack itself and crash.
   341  //
   342  //go:nosplit
   343  func gcLockStackBarriers(gp *g) {
   344  	// Disable preemption so scanstack cannot run while the caller
   345  	// is manipulating the stack barriers.
   346  	acquirem()
   347  	for !atomic.Cas(&gp.stackLock, 0, 1) {
   348  		osyield()
   349  	}
   350  }
   351  
   352  //go:nosplit
   353  func gcTryLockStackBarriers(gp *g) bool {
   354  	mp := acquirem()
   355  	result := atomic.Cas(&gp.stackLock, 0, 1)
   356  	if !result {
   357  		releasem(mp)
   358  	}
   359  	return result
   360  }
   361  
   362  func gcUnlockStackBarriers(gp *g) {
   363  	atomic.Store(&gp.stackLock, 0)
   364  	releasem(getg().m)
   365  }