github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/sema.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Semaphore implementation exposed to Go.
     6  // Intended use is provide a sleep and wakeup
     7  // primitive that can be used in the contended case
     8  // of other synchronization primitives.
     9  // Thus it targets the same goal as Linux's futex,
    10  // but it has much simpler semantics.
    11  //
    12  // That is, don't think of these as semaphores.
    13  // Think of them as a way to implement sleep and wakeup
    14  // such that every sleep is paired with a single wakeup,
    15  // even if, due to races, the wakeup happens before the sleep.
    16  //
    17  // See Mullender and Cox, ``Semaphores in Plan 9,''
    18  // https://swtch.com/semaphore.pdf
    19  
    20  package runtime
    21  
    22  import (
    23  	"internal/cpu"
    24  	"runtime/internal/atomic"
    25  	"unsafe"
    26  )
    27  
    28  // Asynchronous semaphore for sync.Mutex.
    29  
    30  // A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem).
    31  // Each of those sudog may in turn point (through s.waitlink) to a list
    32  // of other sudogs waiting on the same address.
    33  // The operations on the inner lists of sudogs with the same address
    34  // are all O(1). The scanning of the top-level semaRoot list is O(log n),
    35  // where n is the number of distinct addresses with goroutines blocked
    36  // on them that hash to the given semaRoot.
    37  // See golang.org/issue/17953 for a program that worked badly
    38  // before we introduced the second level of list, and
    39  // BenchmarkSemTable/OneAddrCollision/* for a benchmark that exercises this.
    40  type semaRoot struct {
    41  	lock  mutex
    42  	treap *sudog        // root of balanced tree of unique waiters.
    43  	nwait atomic.Uint32 // Number of waiters. Read w/o the lock.
    44  }
    45  
    46  var semtable semTable
    47  
    48  // Prime to not correlate with any user patterns.
    49  const semTabSize = 251
    50  
    51  type semTable [semTabSize]struct {
    52  	root semaRoot
    53  	pad  [cpu.CacheLinePadSize - unsafe.Sizeof(semaRoot{})]byte
    54  }
    55  
    56  func (t *semTable) rootFor(addr *uint32) *semaRoot {
    57  	return &t[(uintptr(unsafe.Pointer(addr))>>3)%semTabSize].root
    58  }
    59  
    60  //go:linkname sync_runtime_Semacquire sync.runtime_Semacquire
    61  func sync_runtime_Semacquire(addr *uint32) {
    62  	semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire)
    63  }
    64  
    65  //go:linkname poll_runtime_Semacquire internal/poll.runtime_Semacquire
    66  func poll_runtime_Semacquire(addr *uint32) {
    67  	semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire)
    68  }
    69  
    70  //go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
    71  func sync_runtime_Semrelease(addr *uint32, handoff bool, skipframes int) {
    72  	semrelease1(addr, handoff, skipframes)
    73  }
    74  
    75  //go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
    76  func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) {
    77  	semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncMutexLock)
    78  }
    79  
    80  //go:linkname sync_runtime_SemacquireRWMutexR sync.runtime_SemacquireRWMutexR
    81  func sync_runtime_SemacquireRWMutexR(addr *uint32, lifo bool, skipframes int) {
    82  	semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexRLock)
    83  }
    84  
    85  //go:linkname sync_runtime_SemacquireRWMutex sync.runtime_SemacquireRWMutex
    86  func sync_runtime_SemacquireRWMutex(addr *uint32, lifo bool, skipframes int) {
    87  	semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexLock)
    88  }
    89  
    90  //go:linkname poll_runtime_Semrelease internal/poll.runtime_Semrelease
    91  func poll_runtime_Semrelease(addr *uint32) {
    92  	semrelease(addr)
    93  }
    94  
    95  func readyWithTime(s *sudog, traceskip int) {
    96  	if s.releasetime != 0 {
    97  		s.releasetime = cputicks()
    98  	}
    99  	goready(s.g, traceskip)
   100  }
   101  
   102  type semaProfileFlags int
   103  
   104  const (
   105  	semaBlockProfile semaProfileFlags = 1 << iota
   106  	semaMutexProfile
   107  )
   108  
   109  // Called from runtime.
   110  func semacquire(addr *uint32) {
   111  	semacquire1(addr, false, 0, 0, waitReasonSemacquire)
   112  }
   113  
   114  func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int, reason waitReason) {
   115  	gp := getg()
   116  	if gp != gp.m.curg {
   117  		throw("semacquire not on the G stack")
   118  	}
   119  
   120  	// Easy case.
   121  	if cansemacquire(addr) {
   122  		return
   123  	}
   124  
   125  	// Harder case:
   126  	//	increment waiter count
   127  	//	try cansemacquire one more time, return if succeeded
   128  	//	enqueue itself as a waiter
   129  	//	sleep
   130  	//	(waiter descriptor is dequeued by signaler)
   131  	s := acquireSudog()
   132  	root := semtable.rootFor(addr)
   133  	t0 := int64(0)
   134  	s.releasetime = 0
   135  	s.acquiretime = 0
   136  	s.ticket = 0
   137  	if profile&semaBlockProfile != 0 && blockprofilerate > 0 {
   138  		t0 = cputicks()
   139  		s.releasetime = -1
   140  	}
   141  	if profile&semaMutexProfile != 0 && mutexprofilerate > 0 {
   142  		if t0 == 0 {
   143  			t0 = cputicks()
   144  		}
   145  		s.acquiretime = t0
   146  	}
   147  	for {
   148  		lockWithRank(&root.lock, lockRankRoot)
   149  		// Add ourselves to nwait to disable "easy case" in semrelease.
   150  		root.nwait.Add(1)
   151  		// Check cansemacquire to avoid missed wakeup.
   152  		if cansemacquire(addr) {
   153  			root.nwait.Add(-1)
   154  			unlock(&root.lock)
   155  			break
   156  		}
   157  		// Any semrelease after the cansemacquire knows we're waiting
   158  		// (we set nwait above), so go to sleep.
   159  		root.queue(addr, s, lifo)
   160  		goparkunlock(&root.lock, reason, traceEvGoBlockSync, 4+skipframes)
   161  		if s.ticket != 0 || cansemacquire(addr) {
   162  			break
   163  		}
   164  	}
   165  	if s.releasetime > 0 {
   166  		blockevent(s.releasetime-t0, 3+skipframes)
   167  	}
   168  	releaseSudog(s)
   169  }
   170  
   171  func semrelease(addr *uint32) {
   172  	semrelease1(addr, false, 0)
   173  }
   174  
   175  func semrelease1(addr *uint32, handoff bool, skipframes int) {
   176  	root := semtable.rootFor(addr)
   177  	atomic.Xadd(addr, 1)
   178  
   179  	// Easy case: no waiters?
   180  	// This check must happen after the xadd, to avoid a missed wakeup
   181  	// (see loop in semacquire).
   182  	if root.nwait.Load() == 0 {
   183  		return
   184  	}
   185  
   186  	// Harder case: search for a waiter and wake it.
   187  	lockWithRank(&root.lock, lockRankRoot)
   188  	if root.nwait.Load() == 0 {
   189  		// The count is already consumed by another goroutine,
   190  		// so no need to wake up another goroutine.
   191  		unlock(&root.lock)
   192  		return
   193  	}
   194  	s, t0 := root.dequeue(addr)
   195  	if s != nil {
   196  		root.nwait.Add(-1)
   197  	}
   198  	unlock(&root.lock)
   199  	if s != nil { // May be slow or even yield, so unlock first
   200  		acquiretime := s.acquiretime
   201  		if acquiretime != 0 {
   202  			mutexevent(t0-acquiretime, 3+skipframes)
   203  		}
   204  		if s.ticket != 0 {
   205  			throw("corrupted semaphore ticket")
   206  		}
   207  		if handoff && cansemacquire(addr) {
   208  			s.ticket = 1
   209  		}
   210  		readyWithTime(s, 5+skipframes)
   211  		if s.ticket == 1 && getg().m.locks == 0 {
   212  			// Direct G handoff
   213  			// readyWithTime has added the waiter G as runnext in the
   214  			// current P; we now call the scheduler so that we start running
   215  			// the waiter G immediately.
   216  			// Note that waiter inherits our time slice: this is desirable
   217  			// to avoid having a highly contended semaphore hog the P
   218  			// indefinitely. goyield is like Gosched, but it emits a
   219  			// "preempted" trace event instead and, more importantly, puts
   220  			// the current G on the local runq instead of the global one.
   221  			// We only do this in the starving regime (handoff=true), as in
   222  			// the non-starving case it is possible for a different waiter
   223  			// to acquire the semaphore while we are yielding/scheduling,
   224  			// and this would be wasteful. We wait instead to enter starving
   225  			// regime, and then we start to do direct handoffs of ticket and
   226  			// P.
   227  			// See issue 33747 for discussion.
   228  			goyield()
   229  		}
   230  	}
   231  }
   232  
   233  func cansemacquire(addr *uint32) bool {
   234  	for {
   235  		v := atomic.Load(addr)
   236  		if v == 0 {
   237  			return false
   238  		}
   239  		if atomic.Cas(addr, v, v-1) {
   240  			return true
   241  		}
   242  	}
   243  }
   244  
   245  // queue adds s to the blocked goroutines in semaRoot.
   246  func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
   247  	s.g = getg()
   248  	s.elem = unsafe.Pointer(addr)
   249  	s.next = nil
   250  	s.prev = nil
   251  
   252  	var last *sudog
   253  	pt := &root.treap
   254  	for t := *pt; t != nil; t = *pt {
   255  		if t.elem == unsafe.Pointer(addr) {
   256  			// Already have addr in list.
   257  			if lifo {
   258  				// Substitute s in t's place in treap.
   259  				*pt = s
   260  				s.ticket = t.ticket
   261  				s.acquiretime = t.acquiretime
   262  				s.parent = t.parent
   263  				s.prev = t.prev
   264  				s.next = t.next
   265  				if s.prev != nil {
   266  					s.prev.parent = s
   267  				}
   268  				if s.next != nil {
   269  					s.next.parent = s
   270  				}
   271  				// Add t first in s's wait list.
   272  				s.waitlink = t
   273  				s.waittail = t.waittail
   274  				if s.waittail == nil {
   275  					s.waittail = t
   276  				}
   277  				t.parent = nil
   278  				t.prev = nil
   279  				t.next = nil
   280  				t.waittail = nil
   281  			} else {
   282  				// Add s to end of t's wait list.
   283  				if t.waittail == nil {
   284  					t.waitlink = s
   285  				} else {
   286  					t.waittail.waitlink = s
   287  				}
   288  				t.waittail = s
   289  				s.waitlink = nil
   290  			}
   291  			return
   292  		}
   293  		last = t
   294  		if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) {
   295  			pt = &t.prev
   296  		} else {
   297  			pt = &t.next
   298  		}
   299  	}
   300  
   301  	// Add s as new leaf in tree of unique addrs.
   302  	// The balanced tree is a treap using ticket as the random heap priority.
   303  	// That is, it is a binary tree ordered according to the elem addresses,
   304  	// but then among the space of possible binary trees respecting those
   305  	// addresses, it is kept balanced on average by maintaining a heap ordering
   306  	// on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket.
   307  	// https://en.wikipedia.org/wiki/Treap
   308  	// https://faculty.washington.edu/aragon/pubs/rst89.pdf
   309  	//
   310  	// s.ticket compared with zero in couple of places, therefore set lowest bit.
   311  	// It will not affect treap's quality noticeably.
   312  	s.ticket = fastrand() | 1
   313  	s.parent = last
   314  	*pt = s
   315  
   316  	// Rotate up into tree according to ticket (priority).
   317  	for s.parent != nil && s.parent.ticket > s.ticket {
   318  		if s.parent.prev == s {
   319  			root.rotateRight(s.parent)
   320  		} else {
   321  			if s.parent.next != s {
   322  				panic("semaRoot queue")
   323  			}
   324  			root.rotateLeft(s.parent)
   325  		}
   326  	}
   327  }
   328  
   329  // dequeue searches for and finds the first goroutine
   330  // in semaRoot blocked on addr.
   331  // If the sudog was being profiled, dequeue returns the time
   332  // at which it was woken up as now. Otherwise now is 0.
   333  func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
   334  	ps := &root.treap
   335  	s := *ps
   336  	for ; s != nil; s = *ps {
   337  		if s.elem == unsafe.Pointer(addr) {
   338  			goto Found
   339  		}
   340  		if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) {
   341  			ps = &s.prev
   342  		} else {
   343  			ps = &s.next
   344  		}
   345  	}
   346  	return nil, 0
   347  
   348  Found:
   349  	now = int64(0)
   350  	if s.acquiretime != 0 {
   351  		now = cputicks()
   352  	}
   353  	if t := s.waitlink; t != nil {
   354  		// Substitute t, also waiting on addr, for s in root tree of unique addrs.
   355  		*ps = t
   356  		t.ticket = s.ticket
   357  		t.parent = s.parent
   358  		t.prev = s.prev
   359  		if t.prev != nil {
   360  			t.prev.parent = t
   361  		}
   362  		t.next = s.next
   363  		if t.next != nil {
   364  			t.next.parent = t
   365  		}
   366  		if t.waitlink != nil {
   367  			t.waittail = s.waittail
   368  		} else {
   369  			t.waittail = nil
   370  		}
   371  		t.acquiretime = now
   372  		s.waitlink = nil
   373  		s.waittail = nil
   374  	} else {
   375  		// Rotate s down to be leaf of tree for removal, respecting priorities.
   376  		for s.next != nil || s.prev != nil {
   377  			if s.next == nil || s.prev != nil && s.prev.ticket < s.next.ticket {
   378  				root.rotateRight(s)
   379  			} else {
   380  				root.rotateLeft(s)
   381  			}
   382  		}
   383  		// Remove s, now a leaf.
   384  		if s.parent != nil {
   385  			if s.parent.prev == s {
   386  				s.parent.prev = nil
   387  			} else {
   388  				s.parent.next = nil
   389  			}
   390  		} else {
   391  			root.treap = nil
   392  		}
   393  	}
   394  	s.parent = nil
   395  	s.elem = nil
   396  	s.next = nil
   397  	s.prev = nil
   398  	s.ticket = 0
   399  	return s, now
   400  }
   401  
   402  // rotateLeft rotates the tree rooted at node x.
   403  // turning (x a (y b c)) into (y (x a b) c).
   404  func (root *semaRoot) rotateLeft(x *sudog) {
   405  	// p -> (x a (y b c))
   406  	p := x.parent
   407  	y := x.next
   408  	b := y.prev
   409  
   410  	y.prev = x
   411  	x.parent = y
   412  	x.next = b
   413  	if b != nil {
   414  		b.parent = x
   415  	}
   416  
   417  	y.parent = p
   418  	if p == nil {
   419  		root.treap = y
   420  	} else if p.prev == x {
   421  		p.prev = y
   422  	} else {
   423  		if p.next != x {
   424  			throw("semaRoot rotateLeft")
   425  		}
   426  		p.next = y
   427  	}
   428  }
   429  
   430  // rotateRight rotates the tree rooted at node y.
   431  // turning (y (x a b) c) into (x a (y b c)).
   432  func (root *semaRoot) rotateRight(y *sudog) {
   433  	// p -> (y (x a b) c)
   434  	p := y.parent
   435  	x := y.prev
   436  	b := x.next
   437  
   438  	x.next = y
   439  	y.parent = x
   440  	y.prev = b
   441  	if b != nil {
   442  		b.parent = y
   443  	}
   444  
   445  	x.parent = p
   446  	if p == nil {
   447  		root.treap = x
   448  	} else if p.prev == y {
   449  		p.prev = x
   450  	} else {
   451  		if p.next != y {
   452  			throw("semaRoot rotateRight")
   453  		}
   454  		p.next = x
   455  	}
   456  }
   457  
   458  // notifyList is a ticket-based notification list used to implement sync.Cond.
   459  //
   460  // It must be kept in sync with the sync package.
   461  type notifyList struct {
   462  	// wait is the ticket number of the next waiter. It is atomically
   463  	// incremented outside the lock.
   464  	wait atomic.Uint32
   465  
   466  	// notify is the ticket number of the next waiter to be notified. It can
   467  	// be read outside the lock, but is only written to with lock held.
   468  	//
   469  	// Both wait & notify can wrap around, and such cases will be correctly
   470  	// handled as long as their "unwrapped" difference is bounded by 2^31.
   471  	// For this not to be the case, we'd need to have 2^31+ goroutines
   472  	// blocked on the same condvar, which is currently not possible.
   473  	notify uint32
   474  
   475  	// List of parked waiters.
   476  	lock mutex
   477  	head *sudog
   478  	tail *sudog
   479  }
   480  
   481  // less checks if a < b, considering a & b running counts that may overflow the
   482  // 32-bit range, and that their "unwrapped" difference is always less than 2^31.
   483  func less(a, b uint32) bool {
   484  	return int32(a-b) < 0
   485  }
   486  
   487  // notifyListAdd adds the caller to a notify list such that it can receive
   488  // notifications. The caller must eventually call notifyListWait to wait for
   489  // such a notification, passing the returned ticket number.
   490  //
   491  //go:linkname notifyListAdd sync.runtime_notifyListAdd
   492  func notifyListAdd(l *notifyList) uint32 {
   493  	// This may be called concurrently, for example, when called from
   494  	// sync.Cond.Wait while holding a RWMutex in read mode.
   495  	return l.wait.Add(1) - 1
   496  }
   497  
   498  // notifyListWait waits for a notification. If one has been sent since
   499  // notifyListAdd was called, it returns immediately. Otherwise, it blocks.
   500  //
   501  //go:linkname notifyListWait sync.runtime_notifyListWait
   502  func notifyListWait(l *notifyList, t uint32) {
   503  	lockWithRank(&l.lock, lockRankNotifyList)
   504  
   505  	// Return right away if this ticket has already been notified.
   506  	if less(t, l.notify) {
   507  		unlock(&l.lock)
   508  		return
   509  	}
   510  
   511  	// Enqueue itself.
   512  	s := acquireSudog()
   513  	s.g = getg()
   514  	s.ticket = t
   515  	s.releasetime = 0
   516  	t0 := int64(0)
   517  	if blockprofilerate > 0 {
   518  		t0 = cputicks()
   519  		s.releasetime = -1
   520  	}
   521  	if l.tail == nil {
   522  		l.head = s
   523  	} else {
   524  		l.tail.next = s
   525  	}
   526  	l.tail = s
   527  	goparkunlock(&l.lock, waitReasonSyncCondWait, traceEvGoBlockCond, 3)
   528  	if t0 != 0 {
   529  		blockevent(s.releasetime-t0, 2)
   530  	}
   531  	releaseSudog(s)
   532  }
   533  
   534  // notifyListNotifyAll notifies all entries in the list.
   535  //
   536  //go:linkname notifyListNotifyAll sync.runtime_notifyListNotifyAll
   537  func notifyListNotifyAll(l *notifyList) {
   538  	// Fast-path: if there are no new waiters since the last notification
   539  	// we don't need to acquire the lock.
   540  	if l.wait.Load() == atomic.Load(&l.notify) {
   541  		return
   542  	}
   543  
   544  	// Pull the list out into a local variable, waiters will be readied
   545  	// outside the lock.
   546  	lockWithRank(&l.lock, lockRankNotifyList)
   547  	s := l.head
   548  	l.head = nil
   549  	l.tail = nil
   550  
   551  	// Update the next ticket to be notified. We can set it to the current
   552  	// value of wait because any previous waiters are already in the list
   553  	// or will notice that they have already been notified when trying to
   554  	// add themselves to the list.
   555  	atomic.Store(&l.notify, l.wait.Load())
   556  	unlock(&l.lock)
   557  
   558  	// Go through the local list and ready all waiters.
   559  	for s != nil {
   560  		next := s.next
   561  		s.next = nil
   562  		readyWithTime(s, 4)
   563  		s = next
   564  	}
   565  }
   566  
   567  // notifyListNotifyOne notifies one entry in the list.
   568  //
   569  //go:linkname notifyListNotifyOne sync.runtime_notifyListNotifyOne
   570  func notifyListNotifyOne(l *notifyList) {
   571  	// Fast-path: if there are no new waiters since the last notification
   572  	// we don't need to acquire the lock at all.
   573  	if l.wait.Load() == atomic.Load(&l.notify) {
   574  		return
   575  	}
   576  
   577  	lockWithRank(&l.lock, lockRankNotifyList)
   578  
   579  	// Re-check under the lock if we need to do anything.
   580  	t := l.notify
   581  	if t == l.wait.Load() {
   582  		unlock(&l.lock)
   583  		return
   584  	}
   585  
   586  	// Update the next notify ticket number.
   587  	atomic.Store(&l.notify, t+1)
   588  
   589  	// Try to find the g that needs to be notified.
   590  	// If it hasn't made it to the list yet we won't find it,
   591  	// but it won't park itself once it sees the new notify number.
   592  	//
   593  	// This scan looks linear but essentially always stops quickly.
   594  	// Because g's queue separately from taking numbers,
   595  	// there may be minor reorderings in the list, but we
   596  	// expect the g we're looking for to be near the front.
   597  	// The g has others in front of it on the list only to the
   598  	// extent that it lost the race, so the iteration will not
   599  	// be too long. This applies even when the g is missing:
   600  	// it hasn't yet gotten to sleep and has lost the race to
   601  	// the (few) other g's that we find on the list.
   602  	for p, s := (*sudog)(nil), l.head; s != nil; p, s = s, s.next {
   603  		if s.ticket == t {
   604  			n := s.next
   605  			if p != nil {
   606  				p.next = n
   607  			} else {
   608  				l.head = n
   609  			}
   610  			if n == nil {
   611  				l.tail = p
   612  			}
   613  			unlock(&l.lock)
   614  			s.next = nil
   615  			readyWithTime(s, 4)
   616  			return
   617  		}
   618  	}
   619  	unlock(&l.lock)
   620  }
   621  
   622  //go:linkname notifyListCheck sync.runtime_notifyListCheck
   623  func notifyListCheck(sz uintptr) {
   624  	if sz != unsafe.Sizeof(notifyList{}) {
   625  		print("runtime: bad notifyList size - sync=", sz, " runtime=", unsafe.Sizeof(notifyList{}), "\n")
   626  		throw("bad notifyList size")
   627  	}
   628  }
   629  
   630  //go:linkname sync_nanotime sync.runtime_nanotime
   631  func sync_nanotime() int64 {
   632  	return nanotime()
   633  }