github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/runtime/mgcwork.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/atomic"
     9  	"runtime/internal/sys"
    10  	"unsafe"
    11  )
    12  
    13  const (
    14  	_WorkbufSize = 2048 // in bytes; larger values result in less contention
    15  
    16  	// workbufAlloc is the number of bytes to allocate at a time
    17  	// for new workbufs. This must be a multiple of pageSize and
    18  	// should be a multiple of _WorkbufSize.
    19  	//
    20  	// Larger values reduce workbuf allocation overhead. Smaller
    21  	// values reduce heap fragmentation.
    22  	workbufAlloc = 32 << 10
    23  )
    24  
    25  func init() {
    26  	if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
    27  		throw("bad workbufAlloc")
    28  	}
    29  }
    30  
    31  // Garbage collector work pool abstraction.
    32  //
    33  // This implements a producer/consumer model for pointers to grey
    34  // objects. A grey object is one that is marked and on a work
    35  // queue. A black object is marked and not on a work queue.
    36  //
    37  // Write barriers, root discovery, stack scanning, and object scanning
    38  // produce pointers to grey objects. Scanning consumes pointers to
    39  // grey objects, thus blackening them, and then scans them,
    40  // potentially producing new pointers to grey objects.
    41  
    42  // A gcWork provides the interface to produce and consume work for the
    43  // garbage collector.
    44  //
    45  // A gcWork can be used on the stack as follows:
    46  //
    47  //     (preemption must be disabled)
    48  //     gcw := &getg().m.p.ptr().gcw
    49  //     .. call gcw.put() to produce and gcw.get() to consume ..
    50  //     if gcBlackenPromptly {
    51  //         gcw.dispose()
    52  //     }
    53  //
    54  // It's important that any use of gcWork during the mark phase prevent
    55  // the garbage collector from transitioning to mark termination since
    56  // gcWork may locally hold GC work buffers. This can be done by
    57  // disabling preemption (systemstack or acquirem).
    58  type gcWork struct {
    59  	// wbuf1 and wbuf2 are the primary and secondary work buffers.
    60  	//
    61  	// This can be thought of as a stack of both work buffers'
    62  	// pointers concatenated. When we pop the last pointer, we
    63  	// shift the stack up by one work buffer by bringing in a new
    64  	// full buffer and discarding an empty one. When we fill both
    65  	// buffers, we shift the stack down by one work buffer by
    66  	// bringing in a new empty buffer and discarding a full one.
    67  	// This way we have one buffer's worth of hysteresis, which
    68  	// amortizes the cost of getting or putting a work buffer over
    69  	// at least one buffer of work and reduces contention on the
    70  	// global work lists.
    71  	//
    72  	// wbuf1 is always the buffer we're currently pushing to and
    73  	// popping from and wbuf2 is the buffer that will be discarded
    74  	// next.
    75  	//
    76  	// Invariant: Both wbuf1 and wbuf2 are nil or neither are.
    77  	wbuf1, wbuf2 *workbuf
    78  
    79  	// Bytes marked (blackened) on this gcWork. This is aggregated
    80  	// into work.bytesMarked by dispose.
    81  	bytesMarked uint64
    82  
    83  	// Scan work performed on this gcWork. This is aggregated into
    84  	// gcController by dispose and may also be flushed by callers.
    85  	scanWork int64
    86  }
    87  
    88  // Most of the methods of gcWork are go:nowritebarrierrec because the
    89  // write barrier itself can invoke gcWork methods but the methods are
    90  // not generally re-entrant. Hence, if a gcWork method invoked the
    91  // write barrier while the gcWork was in an inconsistent state, and
    92  // the write barrier in turn invoked a gcWork method, it could
    93  // permanently corrupt the gcWork.
    94  
    95  func (w *gcWork) init() {
    96  	w.wbuf1 = getempty()
    97  	wbuf2 := trygetfull()
    98  	if wbuf2 == nil {
    99  		wbuf2 = getempty()
   100  	}
   101  	w.wbuf2 = wbuf2
   102  }
   103  
   104  // put enqueues a pointer for the garbage collector to trace.
   105  // obj must point to the beginning of a heap object or an oblet.
   106  //go:nowritebarrierrec
   107  func (w *gcWork) put(obj uintptr) {
   108  	flushed := false
   109  	wbuf := w.wbuf1
   110  	if wbuf == nil {
   111  		w.init()
   112  		wbuf = w.wbuf1
   113  		// wbuf is empty at this point.
   114  	} else if wbuf.nobj == len(wbuf.obj) {
   115  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   116  		wbuf = w.wbuf1
   117  		if wbuf.nobj == len(wbuf.obj) {
   118  			putfull(wbuf)
   119  			wbuf = getempty()
   120  			w.wbuf1 = wbuf
   121  			flushed = true
   122  		}
   123  	}
   124  
   125  	wbuf.obj[wbuf.nobj] = obj
   126  	wbuf.nobj++
   127  
   128  	// If we put a buffer on full, let the GC controller know so
   129  	// it can encourage more workers to run. We delay this until
   130  	// the end of put so that w is in a consistent state, since
   131  	// enlistWorker may itself manipulate w.
   132  	if flushed && gcphase == _GCmark {
   133  		gcController.enlistWorker()
   134  	}
   135  }
   136  
   137  // putFast does a put and returns true if it can be done quickly
   138  // otherwise it returns false and the caller needs to call put.
   139  //go:nowritebarrierrec
   140  func (w *gcWork) putFast(obj uintptr) bool {
   141  	wbuf := w.wbuf1
   142  	if wbuf == nil {
   143  		return false
   144  	} else if wbuf.nobj == len(wbuf.obj) {
   145  		return false
   146  	}
   147  
   148  	wbuf.obj[wbuf.nobj] = obj
   149  	wbuf.nobj++
   150  	return true
   151  }
   152  
   153  // putBatch performs a put on every pointer in obj. See put for
   154  // constraints on these pointers.
   155  //
   156  //go:nowritebarrierrec
   157  func (w *gcWork) putBatch(obj []uintptr) {
   158  	if len(obj) == 0 {
   159  		return
   160  	}
   161  
   162  	flushed := false
   163  	wbuf := w.wbuf1
   164  	if wbuf == nil {
   165  		w.init()
   166  		wbuf = w.wbuf1
   167  	}
   168  
   169  	for len(obj) > 0 {
   170  		for wbuf.nobj == len(wbuf.obj) {
   171  			putfull(wbuf)
   172  			w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
   173  			wbuf = w.wbuf1
   174  			flushed = true
   175  		}
   176  		n := copy(wbuf.obj[wbuf.nobj:], obj)
   177  		wbuf.nobj += n
   178  		obj = obj[n:]
   179  	}
   180  
   181  	if flushed && gcphase == _GCmark {
   182  		gcController.enlistWorker()
   183  	}
   184  }
   185  
   186  // tryGet dequeues a pointer for the garbage collector to trace.
   187  //
   188  // If there are no pointers remaining in this gcWork or in the global
   189  // queue, tryGet returns 0.  Note that there may still be pointers in
   190  // other gcWork instances or other caches.
   191  //go:nowritebarrierrec
   192  func (w *gcWork) tryGet() uintptr {
   193  	wbuf := w.wbuf1
   194  	if wbuf == nil {
   195  		w.init()
   196  		wbuf = w.wbuf1
   197  		// wbuf is empty at this point.
   198  	}
   199  	if wbuf.nobj == 0 {
   200  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   201  		wbuf = w.wbuf1
   202  		if wbuf.nobj == 0 {
   203  			owbuf := wbuf
   204  			wbuf = trygetfull()
   205  			if wbuf == nil {
   206  				return 0
   207  			}
   208  			putempty(owbuf)
   209  			w.wbuf1 = wbuf
   210  		}
   211  	}
   212  
   213  	wbuf.nobj--
   214  	return wbuf.obj[wbuf.nobj]
   215  }
   216  
   217  // tryGetFast dequeues a pointer for the garbage collector to trace
   218  // if one is readily available. Otherwise it returns 0 and
   219  // the caller is expected to call tryGet().
   220  //go:nowritebarrierrec
   221  func (w *gcWork) tryGetFast() uintptr {
   222  	wbuf := w.wbuf1
   223  	if wbuf == nil {
   224  		return 0
   225  	}
   226  	if wbuf.nobj == 0 {
   227  		return 0
   228  	}
   229  
   230  	wbuf.nobj--
   231  	return wbuf.obj[wbuf.nobj]
   232  }
   233  
   234  // get dequeues a pointer for the garbage collector to trace, blocking
   235  // if necessary to ensure all pointers from all queues and caches have
   236  // been retrieved.  get returns 0 if there are no pointers remaining.
   237  //go:nowritebarrierrec
   238  func (w *gcWork) get() uintptr {
   239  	wbuf := w.wbuf1
   240  	if wbuf == nil {
   241  		w.init()
   242  		wbuf = w.wbuf1
   243  		// wbuf is empty at this point.
   244  	}
   245  	if wbuf.nobj == 0 {
   246  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   247  		wbuf = w.wbuf1
   248  		if wbuf.nobj == 0 {
   249  			owbuf := wbuf
   250  			wbuf = getfull()
   251  			if wbuf == nil {
   252  				return 0
   253  			}
   254  			putempty(owbuf)
   255  			w.wbuf1 = wbuf
   256  		}
   257  	}
   258  
   259  	// TODO: This might be a good place to add prefetch code
   260  
   261  	wbuf.nobj--
   262  	return wbuf.obj[wbuf.nobj]
   263  }
   264  
   265  // dispose returns any cached pointers to the global queue.
   266  // The buffers are being put on the full queue so that the
   267  // write barriers will not simply reacquire them before the
   268  // GC can inspect them. This helps reduce the mutator's
   269  // ability to hide pointers during the concurrent mark phase.
   270  //
   271  //go:nowritebarrierrec
   272  func (w *gcWork) dispose() {
   273  	if wbuf := w.wbuf1; wbuf != nil {
   274  		if wbuf.nobj == 0 {
   275  			putempty(wbuf)
   276  		} else {
   277  			putfull(wbuf)
   278  		}
   279  		w.wbuf1 = nil
   280  
   281  		wbuf = w.wbuf2
   282  		if wbuf.nobj == 0 {
   283  			putempty(wbuf)
   284  		} else {
   285  			putfull(wbuf)
   286  		}
   287  		w.wbuf2 = nil
   288  	}
   289  	if w.bytesMarked != 0 {
   290  		// dispose happens relatively infrequently. If this
   291  		// atomic becomes a problem, we should first try to
   292  		// dispose less and if necessary aggregate in a per-P
   293  		// counter.
   294  		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
   295  		w.bytesMarked = 0
   296  	}
   297  	if w.scanWork != 0 {
   298  		atomic.Xaddint64(&gcController.scanWork, w.scanWork)
   299  		w.scanWork = 0
   300  	}
   301  }
   302  
   303  // balance moves some work that's cached in this gcWork back on the
   304  // global queue.
   305  //go:nowritebarrierrec
   306  func (w *gcWork) balance() {
   307  	if w.wbuf1 == nil {
   308  		return
   309  	}
   310  	if wbuf := w.wbuf2; wbuf.nobj != 0 {
   311  		putfull(wbuf)
   312  		w.wbuf2 = getempty()
   313  	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
   314  		w.wbuf1 = handoff(wbuf)
   315  	} else {
   316  		return
   317  	}
   318  	// We flushed a buffer to the full list, so wake a worker.
   319  	if gcphase == _GCmark {
   320  		gcController.enlistWorker()
   321  	}
   322  }
   323  
   324  // empty returns true if w has no mark work available.
   325  //go:nowritebarrierrec
   326  func (w *gcWork) empty() bool {
   327  	return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)
   328  }
   329  
   330  // Internally, the GC work pool is kept in arrays in work buffers.
   331  // The gcWork interface caches a work buffer until full (or empty) to
   332  // avoid contending on the global work buffer lists.
   333  
   334  type workbufhdr struct {
   335  	node lfnode // must be first
   336  	nobj int
   337  }
   338  
   339  //go:notinheap
   340  type workbuf struct {
   341  	workbufhdr
   342  	// account for the above fields
   343  	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr
   344  }
   345  
   346  // workbuf factory routines. These funcs are used to manage the
   347  // workbufs.
   348  // If the GC asks for some work these are the only routines that
   349  // make wbufs available to the GC.
   350  
   351  func (b *workbuf) checknonempty() {
   352  	if b.nobj == 0 {
   353  		throw("workbuf is empty")
   354  	}
   355  }
   356  
   357  func (b *workbuf) checkempty() {
   358  	if b.nobj != 0 {
   359  		throw("workbuf is not empty")
   360  	}
   361  }
   362  
   363  // getempty pops an empty work buffer off the work.empty list,
   364  // allocating new buffers if none are available.
   365  //go:nowritebarrier
   366  func getempty() *workbuf {
   367  	var b *workbuf
   368  	if work.empty != 0 {
   369  		b = (*workbuf)(work.empty.pop())
   370  		if b != nil {
   371  			b.checkempty()
   372  		}
   373  	}
   374  	if b == nil {
   375  		// Allocate more workbufs.
   376  		var s *mspan
   377  		if work.wbufSpans.free.first != nil {
   378  			lock(&work.wbufSpans.lock)
   379  			s = work.wbufSpans.free.first
   380  			if s != nil {
   381  				work.wbufSpans.free.remove(s)
   382  				work.wbufSpans.busy.insert(s)
   383  			}
   384  			unlock(&work.wbufSpans.lock)
   385  		}
   386  		if s == nil {
   387  			systemstack(func() {
   388  				s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys)
   389  			})
   390  			if s == nil {
   391  				throw("out of memory")
   392  			}
   393  			// Record the new span in the busy list.
   394  			lock(&work.wbufSpans.lock)
   395  			work.wbufSpans.busy.insert(s)
   396  			unlock(&work.wbufSpans.lock)
   397  		}
   398  		// Slice up the span into new workbufs. Return one and
   399  		// put the rest on the empty list.
   400  		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
   401  			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
   402  			newb.nobj = 0
   403  			if i == 0 {
   404  				b = newb
   405  			} else {
   406  				putempty(newb)
   407  			}
   408  		}
   409  	}
   410  	return b
   411  }
   412  
   413  // putempty puts a workbuf onto the work.empty list.
   414  // Upon entry this go routine owns b. The lfstack.push relinquishes ownership.
   415  //go:nowritebarrier
   416  func putempty(b *workbuf) {
   417  	b.checkempty()
   418  	work.empty.push(&b.node)
   419  }
   420  
   421  // putfull puts the workbuf on the work.full list for the GC.
   422  // putfull accepts partially full buffers so the GC can avoid competing
   423  // with the mutators for ownership of partially full buffers.
   424  //go:nowritebarrier
   425  func putfull(b *workbuf) {
   426  	b.checknonempty()
   427  	work.full.push(&b.node)
   428  }
   429  
   430  // trygetfull tries to get a full or partially empty workbuffer.
   431  // If one is not immediately available return nil
   432  //go:nowritebarrier
   433  func trygetfull() *workbuf {
   434  	b := (*workbuf)(work.full.pop())
   435  	if b != nil {
   436  		b.checknonempty()
   437  		return b
   438  	}
   439  	return b
   440  }
   441  
   442  // Get a full work buffer off the work.full list.
   443  // If nothing is available wait until all the other gc helpers have
   444  // finished and then return nil.
   445  // getfull acts as a barrier for work.nproc helpers. As long as one
   446  // gchelper is actively marking objects it
   447  // may create a workbuffer that the other helpers can work on.
   448  // The for loop either exits when a work buffer is found
   449  // or when _all_ of the work.nproc GC helpers are in the loop
   450  // looking for work and thus not capable of creating new work.
   451  // This is in fact the termination condition for the STW mark
   452  // phase.
   453  //go:nowritebarrier
   454  func getfull() *workbuf {
   455  	b := (*workbuf)(work.full.pop())
   456  	if b != nil {
   457  		b.checknonempty()
   458  		return b
   459  	}
   460  
   461  	incnwait := atomic.Xadd(&work.nwait, +1)
   462  	if incnwait > work.nproc {
   463  		println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
   464  		throw("work.nwait > work.nproc")
   465  	}
   466  	for i := 0; ; i++ {
   467  		if work.full != 0 {
   468  			decnwait := atomic.Xadd(&work.nwait, -1)
   469  			if decnwait == work.nproc {
   470  				println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
   471  				throw("work.nwait > work.nproc")
   472  			}
   473  			b = (*workbuf)(work.full.pop())
   474  			if b != nil {
   475  				b.checknonempty()
   476  				return b
   477  			}
   478  			incnwait := atomic.Xadd(&work.nwait, +1)
   479  			if incnwait > work.nproc {
   480  				println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
   481  				throw("work.nwait > work.nproc")
   482  			}
   483  		}
   484  		if work.nwait == work.nproc && work.markrootNext >= work.markrootJobs {
   485  			return nil
   486  		}
   487  		if i < 10 {
   488  			procyield(20)
   489  		} else if i < 20 {
   490  			osyield()
   491  		} else {
   492  			usleep(100)
   493  		}
   494  	}
   495  }
   496  
   497  //go:nowritebarrier
   498  func handoff(b *workbuf) *workbuf {
   499  	// Make new buffer with half of b's pointers.
   500  	b1 := getempty()
   501  	n := b.nobj / 2
   502  	b.nobj -= n
   503  	b1.nobj = n
   504  	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
   505  
   506  	// Put b on full list - let first half of b get stolen.
   507  	putfull(b)
   508  	return b1
   509  }
   510  
   511  // prepareFreeWorkbufs moves busy workbuf spans to free list so they
   512  // can be freed to the heap. This must only be called when all
   513  // workbufs are on the empty list.
   514  func prepareFreeWorkbufs() {
   515  	lock(&work.wbufSpans.lock)
   516  	if work.full != 0 {
   517  		throw("cannot free workbufs when work.full != 0")
   518  	}
   519  	// Since all workbufs are on the empty list, we don't care
   520  	// which ones are in which spans. We can wipe the entire empty
   521  	// list and move all workbuf spans to the free list.
   522  	work.empty = 0
   523  	work.wbufSpans.free.takeAll(&work.wbufSpans.busy)
   524  	unlock(&work.wbufSpans.lock)
   525  }
   526  
   527  // freeSomeWbufs frees some workbufs back to the heap and returns
   528  // true if it should be called again to free more.
   529  func freeSomeWbufs(preemptible bool) bool {
   530  	const batchSize = 64 // ~1–2 µs per span.
   531  	lock(&work.wbufSpans.lock)
   532  	if gcphase != _GCoff || work.wbufSpans.free.isEmpty() {
   533  		unlock(&work.wbufSpans.lock)
   534  		return false
   535  	}
   536  	systemstack(func() {
   537  		gp := getg().m.curg
   538  		for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
   539  			span := work.wbufSpans.free.first
   540  			if span == nil {
   541  				break
   542  			}
   543  			work.wbufSpans.free.remove(span)
   544  			mheap_.freeManual(span, &memstats.gc_sys)
   545  		}
   546  	})
   547  	more := !work.wbufSpans.free.isEmpty()
   548  	unlock(&work.wbufSpans.lock)
   549  	return more
   550  }