github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/sema.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Semaphore implementation exposed to Go. 6 // Intended use is provide a sleep and wakeup 7 // primitive that can be used in the contended case 8 // of other synchronization primitives. 9 // Thus it targets the same goal as Linux's futex, 10 // but it has much simpler semantics. 11 // 12 // That is, don't think of these as semaphores. 13 // Think of them as a way to implement sleep and wakeup 14 // such that every sleep is paired with a single wakeup, 15 // even if, due to races, the wakeup happens before the sleep. 16 // 17 // See Mullender and Cox, ``Semaphores in Plan 9,'' 18 // https://swtch.com/semaphore.pdf 19 20 package runtime 21 22 import ( 23 "internal/cpu" 24 "runtime/internal/atomic" 25 "unsafe" 26 ) 27 28 // Asynchronous semaphore for sync.Mutex. 29 30 // A semaRoot holds a balanced tree of sudog with distinct addresses (s.elem). 31 // Each of those sudog may in turn point (through s.waitlink) to a list 32 // of other sudogs waiting on the same address. 33 // The operations on the inner lists of sudogs with the same address 34 // are all O(1). The scanning of the top-level semaRoot list is O(log n), 35 // where n is the number of distinct addresses with goroutines blocked 36 // on them that hash to the given semaRoot. 37 // See golang.org/issue/17953 for a program that worked badly 38 // before we introduced the second level of list, and 39 // BenchmarkSemTable/OneAddrCollision/* for a benchmark that exercises this. 40 type semaRoot struct { 41 lock mutex 42 treap *sudog // root of balanced tree of unique waiters. 43 nwait atomic.Uint32 // Number of waiters. Read w/o the lock. 44 } 45 46 var semtable semTable 47 48 // Prime to not correlate with any user patterns. 49 const semTabSize = 251 50 51 type semTable [semTabSize]struct { 52 root semaRoot 53 pad [cpu.CacheLinePadSize - unsafe.Sizeof(semaRoot{})]byte 54 } 55 56 func (t *semTable) rootFor(addr *uint32) *semaRoot { 57 return &t[(uintptr(unsafe.Pointer(addr))>>3)%semTabSize].root 58 } 59 60 //go:linkname sync_runtime_Semacquire sync.runtime_Semacquire 61 func sync_runtime_Semacquire(addr *uint32) { 62 semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire) 63 } 64 65 //go:linkname poll_runtime_Semacquire internal/poll.runtime_Semacquire 66 func poll_runtime_Semacquire(addr *uint32) { 67 semacquire1(addr, false, semaBlockProfile, 0, waitReasonSemacquire) 68 } 69 70 //go:linkname sync_runtime_Semrelease sync.runtime_Semrelease 71 func sync_runtime_Semrelease(addr *uint32, handoff bool, skipframes int) { 72 semrelease1(addr, handoff, skipframes) 73 } 74 75 //go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex 76 func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) { 77 semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncMutexLock) 78 } 79 80 //go:linkname sync_runtime_SemacquireRWMutexR sync.runtime_SemacquireRWMutexR 81 func sync_runtime_SemacquireRWMutexR(addr *uint32, lifo bool, skipframes int) { 82 semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexRLock) 83 } 84 85 //go:linkname sync_runtime_SemacquireRWMutex sync.runtime_SemacquireRWMutex 86 func sync_runtime_SemacquireRWMutex(addr *uint32, lifo bool, skipframes int) { 87 semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes, waitReasonSyncRWMutexLock) 88 } 89 90 //go:linkname poll_runtime_Semrelease internal/poll.runtime_Semrelease 91 func poll_runtime_Semrelease(addr *uint32) { 92 semrelease(addr) 93 } 94 95 func readyWithTime(s *sudog, traceskip int) { 96 if s.releasetime != 0 { 97 s.releasetime = cputicks() 98 } 99 goready(s.g, traceskip) 100 } 101 102 type semaProfileFlags int 103 104 const ( 105 semaBlockProfile semaProfileFlags = 1 << iota 106 semaMutexProfile 107 ) 108 109 // Called from runtime. 110 func semacquire(addr *uint32) { 111 semacquire1(addr, false, 0, 0, waitReasonSemacquire) 112 } 113 114 func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int, reason waitReason) { 115 gp := getg() 116 if gp != gp.m.curg { 117 throw("semacquire not on the G stack") 118 } 119 120 // Easy case. 121 if cansemacquire(addr) { 122 return 123 } 124 125 // Harder case: 126 // increment waiter count 127 // try cansemacquire one more time, return if succeeded 128 // enqueue itself as a waiter 129 // sleep 130 // (waiter descriptor is dequeued by signaler) 131 s := acquireSudog() 132 root := semtable.rootFor(addr) 133 t0 := int64(0) 134 s.releasetime = 0 135 s.acquiretime = 0 136 s.ticket = 0 137 if profile&semaBlockProfile != 0 && blockprofilerate > 0 { 138 t0 = cputicks() 139 s.releasetime = -1 140 } 141 if profile&semaMutexProfile != 0 && mutexprofilerate > 0 { 142 if t0 == 0 { 143 t0 = cputicks() 144 } 145 s.acquiretime = t0 146 } 147 for { 148 lockWithRank(&root.lock, lockRankRoot) 149 // Add ourselves to nwait to disable "easy case" in semrelease. 150 root.nwait.Add(1) 151 // Check cansemacquire to avoid missed wakeup. 152 if cansemacquire(addr) { 153 root.nwait.Add(-1) 154 unlock(&root.lock) 155 break 156 } 157 // Any semrelease after the cansemacquire knows we're waiting 158 // (we set nwait above), so go to sleep. 159 root.queue(addr, s, lifo) 160 goparkunlock(&root.lock, reason, traceBlockSync, 4+skipframes) 161 if s.ticket != 0 || cansemacquire(addr) { 162 break 163 } 164 } 165 if s.releasetime > 0 { 166 blockevent(s.releasetime-t0, 3+skipframes) 167 } 168 releaseSudog(s) 169 } 170 171 func semrelease(addr *uint32) { 172 semrelease1(addr, false, 0) 173 } 174 175 func semrelease1(addr *uint32, handoff bool, skipframes int) { 176 root := semtable.rootFor(addr) 177 atomic.Xadd(addr, 1) 178 179 // Easy case: no waiters? 180 // This check must happen after the xadd, to avoid a missed wakeup 181 // (see loop in semacquire). 182 if root.nwait.Load() == 0 { 183 return 184 } 185 186 // Harder case: search for a waiter and wake it. 187 lockWithRank(&root.lock, lockRankRoot) 188 if root.nwait.Load() == 0 { 189 // The count is already consumed by another goroutine, 190 // so no need to wake up another goroutine. 191 unlock(&root.lock) 192 return 193 } 194 s, t0, tailtime := root.dequeue(addr) 195 if s != nil { 196 root.nwait.Add(-1) 197 } 198 unlock(&root.lock) 199 if s != nil { // May be slow or even yield, so unlock first 200 acquiretime := s.acquiretime 201 if acquiretime != 0 { 202 // Charge contention that this (delayed) unlock caused. 203 // If there are N more goroutines waiting beyond the 204 // one that's waking up, charge their delay as well, so that 205 // contention holding up many goroutines shows up as 206 // more costly than contention holding up a single goroutine. 207 // It would take O(N) time to calculate how long each goroutine 208 // has been waiting, so instead we charge avg(head-wait, tail-wait)*N. 209 // head-wait is the longest wait and tail-wait is the shortest. 210 // (When we do a lifo insertion, we preserve this property by 211 // copying the old head's acquiretime into the inserted new head. 212 // In that case the overall average may be slightly high, but that's fine: 213 // the average of the ends is only an approximation to the actual 214 // average anyway.) 215 // The root.dequeue above changed the head and tail acquiretime 216 // to the current time, so the next unlock will not re-count this contention. 217 dt0 := t0 - acquiretime 218 dt := dt0 219 if s.waiters != 0 { 220 dtail := t0 - tailtime 221 dt += (dtail + dt0) / 2 * int64(s.waiters) 222 } 223 mutexevent(dt, 3+skipframes) 224 } 225 if s.ticket != 0 { 226 throw("corrupted semaphore ticket") 227 } 228 if handoff && cansemacquire(addr) { 229 s.ticket = 1 230 } 231 readyWithTime(s, 5+skipframes) 232 if s.ticket == 1 && getg().m.locks == 0 { 233 // Direct G handoff 234 // readyWithTime has added the waiter G as runnext in the 235 // current P; we now call the scheduler so that we start running 236 // the waiter G immediately. 237 // Note that waiter inherits our time slice: this is desirable 238 // to avoid having a highly contended semaphore hog the P 239 // indefinitely. goyield is like Gosched, but it emits a 240 // "preempted" trace event instead and, more importantly, puts 241 // the current G on the local runq instead of the global one. 242 // We only do this in the starving regime (handoff=true), as in 243 // the non-starving case it is possible for a different waiter 244 // to acquire the semaphore while we are yielding/scheduling, 245 // and this would be wasteful. We wait instead to enter starving 246 // regime, and then we start to do direct handoffs of ticket and 247 // P. 248 // See issue 33747 for discussion. 249 goyield() 250 } 251 } 252 } 253 254 func cansemacquire(addr *uint32) bool { 255 for { 256 v := atomic.Load(addr) 257 if v == 0 { 258 return false 259 } 260 if atomic.Cas(addr, v, v-1) { 261 return true 262 } 263 } 264 } 265 266 // queue adds s to the blocked goroutines in semaRoot. 267 func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { 268 s.g = getg() 269 s.elem = unsafe.Pointer(addr) 270 s.next = nil 271 s.prev = nil 272 s.waiters = 0 273 274 var last *sudog 275 pt := &root.treap 276 for t := *pt; t != nil; t = *pt { 277 if t.elem == unsafe.Pointer(addr) { 278 // Already have addr in list. 279 if lifo { 280 // Substitute s in t's place in treap. 281 *pt = s 282 s.ticket = t.ticket 283 s.acquiretime = t.acquiretime // preserve head acquiretime as oldest time 284 s.parent = t.parent 285 s.prev = t.prev 286 s.next = t.next 287 if s.prev != nil { 288 s.prev.parent = s 289 } 290 if s.next != nil { 291 s.next.parent = s 292 } 293 // Add t first in s's wait list. 294 s.waitlink = t 295 s.waittail = t.waittail 296 if s.waittail == nil { 297 s.waittail = t 298 } 299 s.waiters = t.waiters 300 if s.waiters+1 != 0 { 301 s.waiters++ 302 } 303 t.parent = nil 304 t.prev = nil 305 t.next = nil 306 t.waittail = nil 307 } else { 308 // Add s to end of t's wait list. 309 if t.waittail == nil { 310 t.waitlink = s 311 } else { 312 t.waittail.waitlink = s 313 } 314 t.waittail = s 315 s.waitlink = nil 316 if t.waiters+1 != 0 { 317 t.waiters++ 318 } 319 } 320 return 321 } 322 last = t 323 if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) { 324 pt = &t.prev 325 } else { 326 pt = &t.next 327 } 328 } 329 330 // Add s as new leaf in tree of unique addrs. 331 // The balanced tree is a treap using ticket as the random heap priority. 332 // That is, it is a binary tree ordered according to the elem addresses, 333 // but then among the space of possible binary trees respecting those 334 // addresses, it is kept balanced on average by maintaining a heap ordering 335 // on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket. 336 // https://en.wikipedia.org/wiki/Treap 337 // https://faculty.washington.edu/aragon/pubs/rst89.pdf 338 // 339 // s.ticket compared with zero in couple of places, therefore set lowest bit. 340 // It will not affect treap's quality noticeably. 341 s.ticket = cheaprand() | 1 342 s.parent = last 343 *pt = s 344 345 // Rotate up into tree according to ticket (priority). 346 for s.parent != nil && s.parent.ticket > s.ticket { 347 if s.parent.prev == s { 348 root.rotateRight(s.parent) 349 } else { 350 if s.parent.next != s { 351 panic("semaRoot queue") 352 } 353 root.rotateLeft(s.parent) 354 } 355 } 356 } 357 358 // dequeue searches for and finds the first goroutine 359 // in semaRoot blocked on addr. 360 // If the sudog was being profiled, dequeue returns the time 361 // at which it was woken up as now. Otherwise now is 0. 362 // If there are additional entries in the wait list, dequeue 363 // returns tailtime set to the last entry's acquiretime. 364 // Otherwise tailtime is found.acquiretime. 365 func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) { 366 ps := &root.treap 367 s := *ps 368 for ; s != nil; s = *ps { 369 if s.elem == unsafe.Pointer(addr) { 370 goto Found 371 } 372 if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) { 373 ps = &s.prev 374 } else { 375 ps = &s.next 376 } 377 } 378 return nil, 0, 0 379 380 Found: 381 now = int64(0) 382 if s.acquiretime != 0 { 383 now = cputicks() 384 } 385 if t := s.waitlink; t != nil { 386 // Substitute t, also waiting on addr, for s in root tree of unique addrs. 387 *ps = t 388 t.ticket = s.ticket 389 t.parent = s.parent 390 t.prev = s.prev 391 if t.prev != nil { 392 t.prev.parent = t 393 } 394 t.next = s.next 395 if t.next != nil { 396 t.next.parent = t 397 } 398 if t.waitlink != nil { 399 t.waittail = s.waittail 400 } else { 401 t.waittail = nil 402 } 403 t.waiters = s.waiters 404 if t.waiters > 1 { 405 t.waiters-- 406 } 407 // Set head and tail acquire time to 'now', 408 // because the caller will take care of charging 409 // the delays before now for all entries in the list. 410 t.acquiretime = now 411 tailtime = s.waittail.acquiretime 412 s.waittail.acquiretime = now 413 s.waitlink = nil 414 s.waittail = nil 415 } else { 416 // Rotate s down to be leaf of tree for removal, respecting priorities. 417 for s.next != nil || s.prev != nil { 418 if s.next == nil || s.prev != nil && s.prev.ticket < s.next.ticket { 419 root.rotateRight(s) 420 } else { 421 root.rotateLeft(s) 422 } 423 } 424 // Remove s, now a leaf. 425 if s.parent != nil { 426 if s.parent.prev == s { 427 s.parent.prev = nil 428 } else { 429 s.parent.next = nil 430 } 431 } else { 432 root.treap = nil 433 } 434 tailtime = s.acquiretime 435 } 436 s.parent = nil 437 s.elem = nil 438 s.next = nil 439 s.prev = nil 440 s.ticket = 0 441 return s, now, tailtime 442 } 443 444 // rotateLeft rotates the tree rooted at node x. 445 // turning (x a (y b c)) into (y (x a b) c). 446 func (root *semaRoot) rotateLeft(x *sudog) { 447 // p -> (x a (y b c)) 448 p := x.parent 449 y := x.next 450 b := y.prev 451 452 y.prev = x 453 x.parent = y 454 x.next = b 455 if b != nil { 456 b.parent = x 457 } 458 459 y.parent = p 460 if p == nil { 461 root.treap = y 462 } else if p.prev == x { 463 p.prev = y 464 } else { 465 if p.next != x { 466 throw("semaRoot rotateLeft") 467 } 468 p.next = y 469 } 470 } 471 472 // rotateRight rotates the tree rooted at node y. 473 // turning (y (x a b) c) into (x a (y b c)). 474 func (root *semaRoot) rotateRight(y *sudog) { 475 // p -> (y (x a b) c) 476 p := y.parent 477 x := y.prev 478 b := x.next 479 480 x.next = y 481 y.parent = x 482 y.prev = b 483 if b != nil { 484 b.parent = y 485 } 486 487 x.parent = p 488 if p == nil { 489 root.treap = x 490 } else if p.prev == y { 491 p.prev = x 492 } else { 493 if p.next != y { 494 throw("semaRoot rotateRight") 495 } 496 p.next = x 497 } 498 } 499 500 // notifyList is a ticket-based notification list used to implement sync.Cond. 501 // 502 // It must be kept in sync with the sync package. 503 type notifyList struct { 504 // wait is the ticket number of the next waiter. It is atomically 505 // incremented outside the lock. 506 wait atomic.Uint32 507 508 // notify is the ticket number of the next waiter to be notified. It can 509 // be read outside the lock, but is only written to with lock held. 510 // 511 // Both wait & notify can wrap around, and such cases will be correctly 512 // handled as long as their "unwrapped" difference is bounded by 2^31. 513 // For this not to be the case, we'd need to have 2^31+ goroutines 514 // blocked on the same condvar, which is currently not possible. 515 notify uint32 516 517 // List of parked waiters. 518 lock mutex 519 head *sudog 520 tail *sudog 521 } 522 523 // less checks if a < b, considering a & b running counts that may overflow the 524 // 32-bit range, and that their "unwrapped" difference is always less than 2^31. 525 func less(a, b uint32) bool { 526 return int32(a-b) < 0 527 } 528 529 // notifyListAdd adds the caller to a notify list such that it can receive 530 // notifications. The caller must eventually call notifyListWait to wait for 531 // such a notification, passing the returned ticket number. 532 // 533 //go:linkname notifyListAdd sync.runtime_notifyListAdd 534 func notifyListAdd(l *notifyList) uint32 { 535 // This may be called concurrently, for example, when called from 536 // sync.Cond.Wait while holding a RWMutex in read mode. 537 return l.wait.Add(1) - 1 538 } 539 540 // notifyListWait waits for a notification. If one has been sent since 541 // notifyListAdd was called, it returns immediately. Otherwise, it blocks. 542 // 543 //go:linkname notifyListWait sync.runtime_notifyListWait 544 func notifyListWait(l *notifyList, t uint32) { 545 lockWithRank(&l.lock, lockRankNotifyList) 546 547 // Return right away if this ticket has already been notified. 548 if less(t, l.notify) { 549 unlock(&l.lock) 550 return 551 } 552 553 // Enqueue itself. 554 s := acquireSudog() 555 s.g = getg() 556 s.ticket = t 557 s.releasetime = 0 558 t0 := int64(0) 559 if blockprofilerate > 0 { 560 t0 = cputicks() 561 s.releasetime = -1 562 } 563 if l.tail == nil { 564 l.head = s 565 } else { 566 l.tail.next = s 567 } 568 l.tail = s 569 goparkunlock(&l.lock, waitReasonSyncCondWait, traceBlockCondWait, 3) 570 if t0 != 0 { 571 blockevent(s.releasetime-t0, 2) 572 } 573 releaseSudog(s) 574 } 575 576 // notifyListNotifyAll notifies all entries in the list. 577 // 578 //go:linkname notifyListNotifyAll sync.runtime_notifyListNotifyAll 579 func notifyListNotifyAll(l *notifyList) { 580 // Fast-path: if there are no new waiters since the last notification 581 // we don't need to acquire the lock. 582 if l.wait.Load() == atomic.Load(&l.notify) { 583 return 584 } 585 586 // Pull the list out into a local variable, waiters will be readied 587 // outside the lock. 588 lockWithRank(&l.lock, lockRankNotifyList) 589 s := l.head 590 l.head = nil 591 l.tail = nil 592 593 // Update the next ticket to be notified. We can set it to the current 594 // value of wait because any previous waiters are already in the list 595 // or will notice that they have already been notified when trying to 596 // add themselves to the list. 597 atomic.Store(&l.notify, l.wait.Load()) 598 unlock(&l.lock) 599 600 // Go through the local list and ready all waiters. 601 for s != nil { 602 next := s.next 603 s.next = nil 604 readyWithTime(s, 4) 605 s = next 606 } 607 } 608 609 // notifyListNotifyOne notifies one entry in the list. 610 // 611 //go:linkname notifyListNotifyOne sync.runtime_notifyListNotifyOne 612 func notifyListNotifyOne(l *notifyList) { 613 // Fast-path: if there are no new waiters since the last notification 614 // we don't need to acquire the lock at all. 615 if l.wait.Load() == atomic.Load(&l.notify) { 616 return 617 } 618 619 lockWithRank(&l.lock, lockRankNotifyList) 620 621 // Re-check under the lock if we need to do anything. 622 t := l.notify 623 if t == l.wait.Load() { 624 unlock(&l.lock) 625 return 626 } 627 628 // Update the next notify ticket number. 629 atomic.Store(&l.notify, t+1) 630 631 // Try to find the g that needs to be notified. 632 // If it hasn't made it to the list yet we won't find it, 633 // but it won't park itself once it sees the new notify number. 634 // 635 // This scan looks linear but essentially always stops quickly. 636 // Because g's queue separately from taking numbers, 637 // there may be minor reorderings in the list, but we 638 // expect the g we're looking for to be near the front. 639 // The g has others in front of it on the list only to the 640 // extent that it lost the race, so the iteration will not 641 // be too long. This applies even when the g is missing: 642 // it hasn't yet gotten to sleep and has lost the race to 643 // the (few) other g's that we find on the list. 644 for p, s := (*sudog)(nil), l.head; s != nil; p, s = s, s.next { 645 if s.ticket == t { 646 n := s.next 647 if p != nil { 648 p.next = n 649 } else { 650 l.head = n 651 } 652 if n == nil { 653 l.tail = p 654 } 655 unlock(&l.lock) 656 s.next = nil 657 readyWithTime(s, 4) 658 return 659 } 660 } 661 unlock(&l.lock) 662 } 663 664 //go:linkname notifyListCheck sync.runtime_notifyListCheck 665 func notifyListCheck(sz uintptr) { 666 if sz != unsafe.Sizeof(notifyList{}) { 667 print("runtime: bad notifyList size - sync=", sz, " runtime=", unsafe.Sizeof(notifyList{}), "\n") 668 throw("bad notifyList size") 669 } 670 } 671 672 //go:linkname sync_nanotime sync.runtime_nanotime 673 func sync_nanotime() int64 { 674 return nanotime() 675 }