github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/netpoll.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build unix || (js && wasm) || windows 6 7 package runtime 8 9 import ( 10 "runtime/internal/atomic" 11 "runtime/internal/sys" 12 "unsafe" 13 ) 14 15 // Integrated network poller (platform-independent part). 16 // A particular implementation (epoll/kqueue/port/AIX/Windows) 17 // must define the following functions: 18 // 19 // func netpollinit() 20 // Initialize the poller. Only called once. 21 // 22 // func netpollopen(fd uintptr, pd *pollDesc) int32 23 // Arm edge-triggered notifications for fd. The pd argument is to pass 24 // back to netpollready when fd is ready. Return an errno value. 25 // 26 // func netpollclose(fd uintptr) int32 27 // Disable notifications for fd. Return an errno value. 28 // 29 // func netpoll(delta int64) gList 30 // Poll the network. If delta < 0, block indefinitely. If delta == 0, 31 // poll without blocking. If delta > 0, block for up to delta nanoseconds. 32 // Return a list of goroutines built by calling netpollready. 33 // 34 // func netpollBreak() 35 // Wake up the network poller, assumed to be blocked in netpoll. 36 // 37 // func netpollIsPollDescriptor(fd uintptr) bool 38 // Reports whether fd is a file descriptor used by the poller. 39 40 // Error codes returned by runtime_pollReset and runtime_pollWait. 41 // These must match the values in internal/poll/fd_poll_runtime.go. 42 const ( 43 pollNoError = 0 // no error 44 pollErrClosing = 1 // descriptor is closed 45 pollErrTimeout = 2 // I/O timeout 46 pollErrNotPollable = 3 // general error polling descriptor 47 ) 48 49 // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer 50 // goroutines respectively. The semaphore can be in the following states: 51 // 52 // pdReady - io readiness notification is pending; 53 // a goroutine consumes the notification by changing the state to pdNil. 54 // pdWait - a goroutine prepares to park on the semaphore, but not yet parked; 55 // the goroutine commits to park by changing the state to G pointer, 56 // or, alternatively, concurrent io notification changes the state to pdReady, 57 // or, alternatively, concurrent timeout/close changes the state to pdNil. 58 // G pointer - the goroutine is blocked on the semaphore; 59 // io notification or timeout/close changes the state to pdReady or pdNil respectively 60 // and unparks the goroutine. 61 // pdNil - none of the above. 62 const ( 63 pdNil uintptr = 0 64 pdReady uintptr = 1 65 pdWait uintptr = 2 66 ) 67 68 const pollBlockSize = 4 * 1024 69 70 // Network poller descriptor. 71 // 72 // No heap pointers. 73 type pollDesc struct { 74 _ sys.NotInHeap 75 link *pollDesc // in pollcache, protected by pollcache.lock 76 fd uintptr // constant for pollDesc usage lifetime 77 78 // atomicInfo holds bits from closing, rd, and wd, 79 // which are only ever written while holding the lock, 80 // summarized for use by netpollcheckerr, 81 // which cannot acquire the lock. 82 // After writing these fields under lock in a way that 83 // might change the summary, code must call publishInfo 84 // before releasing the lock. 85 // Code that changes fields and then calls netpollunblock 86 // (while still holding the lock) must call publishInfo 87 // before calling netpollunblock, because publishInfo is what 88 // stops netpollblock from blocking anew 89 // (by changing the result of netpollcheckerr). 90 // atomicInfo also holds the eventErr bit, 91 // recording whether a poll event on the fd got an error; 92 // atomicInfo is the only source of truth for that bit. 93 atomicInfo atomic.Uint32 // atomic pollInfo 94 95 // rg, wg are accessed atomically and hold g pointers. 96 // (Using atomic.Uintptr here is similar to using guintptr elsewhere.) 97 rg atomic.Uintptr // pdReady, pdWait, G waiting for read or pdNil 98 wg atomic.Uintptr // pdReady, pdWait, G waiting for write or pdNil 99 100 lock mutex // protects the following fields 101 closing bool 102 user uint32 // user settable cookie 103 rseq uintptr // protects from stale read timers 104 rt timer // read deadline timer (set if rt.f != nil) 105 rd int64 // read deadline (a nanotime in the future, -1 when expired) 106 wseq uintptr // protects from stale write timers 107 wt timer // write deadline timer 108 wd int64 // write deadline (a nanotime in the future, -1 when expired) 109 self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg. 110 } 111 112 // pollInfo is the bits needed by netpollcheckerr, stored atomically, 113 // mostly duplicating state that is manipulated under lock in pollDesc. 114 // The one exception is the pollEventErr bit, which is maintained only 115 // in the pollInfo. 116 type pollInfo uint32 117 118 const ( 119 pollClosing = 1 << iota 120 pollEventErr 121 pollExpiredReadDeadline 122 pollExpiredWriteDeadline 123 ) 124 125 func (i pollInfo) closing() bool { return i&pollClosing != 0 } 126 func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 } 127 func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 } 128 func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 } 129 130 // info returns the pollInfo corresponding to pd. 131 func (pd *pollDesc) info() pollInfo { 132 return pollInfo(pd.atomicInfo.Load()) 133 } 134 135 // publishInfo updates pd.atomicInfo (returned by pd.info) 136 // using the other values in pd. 137 // It must be called while holding pd.lock, 138 // and it must be called after changing anything 139 // that might affect the info bits. 140 // In practice this means after changing closing 141 // or changing rd or wd from < 0 to >= 0. 142 func (pd *pollDesc) publishInfo() { 143 var info uint32 144 if pd.closing { 145 info |= pollClosing 146 } 147 if pd.rd < 0 { 148 info |= pollExpiredReadDeadline 149 } 150 if pd.wd < 0 { 151 info |= pollExpiredWriteDeadline 152 } 153 154 // Set all of x except the pollEventErr bit. 155 x := pd.atomicInfo.Load() 156 for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) { 157 x = pd.atomicInfo.Load() 158 } 159 } 160 161 // setEventErr sets the result of pd.info().eventErr() to b. 162 func (pd *pollDesc) setEventErr(b bool) { 163 x := pd.atomicInfo.Load() 164 for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) { 165 x = pd.atomicInfo.Load() 166 } 167 } 168 169 type pollCache struct { 170 lock mutex 171 first *pollDesc 172 // PollDesc objects must be type-stable, 173 // because we can get ready notification from epoll/kqueue 174 // after the descriptor is closed/reused. 175 // Stale notifications are detected using seq variable, 176 // seq is incremented when deadlines are changed or descriptor is reused. 177 } 178 179 var ( 180 netpollInitLock mutex 181 netpollInited atomic.Uint32 182 183 pollcache pollCache 184 netpollWaiters atomic.Uint32 185 ) 186 187 //go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit 188 func poll_runtime_pollServerInit() { 189 netpollGenericInit() 190 } 191 192 func netpollGenericInit() { 193 if netpollInited.Load() == 0 { 194 lockInit(&netpollInitLock, lockRankNetpollInit) 195 lock(&netpollInitLock) 196 if netpollInited.Load() == 0 { 197 netpollinit() 198 netpollInited.Store(1) 199 } 200 unlock(&netpollInitLock) 201 } 202 } 203 204 func netpollinited() bool { 205 return netpollInited.Load() != 0 206 } 207 208 //go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor 209 210 // poll_runtime_isPollServerDescriptor reports whether fd is a 211 // descriptor being used by netpoll. 212 func poll_runtime_isPollServerDescriptor(fd uintptr) bool { 213 return netpollIsPollDescriptor(fd) 214 } 215 216 //go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen 217 func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) { 218 pd := pollcache.alloc() 219 lock(&pd.lock) 220 wg := pd.wg.Load() 221 if wg != pdNil && wg != pdReady { 222 throw("runtime: blocked write on free polldesc") 223 } 224 rg := pd.rg.Load() 225 if rg != pdNil && rg != pdReady { 226 throw("runtime: blocked read on free polldesc") 227 } 228 pd.fd = fd 229 pd.closing = false 230 pd.setEventErr(false) 231 pd.rseq++ 232 pd.rg.Store(pdNil) 233 pd.rd = 0 234 pd.wseq++ 235 pd.wg.Store(pdNil) 236 pd.wd = 0 237 pd.self = pd 238 pd.publishInfo() 239 unlock(&pd.lock) 240 241 errno := netpollopen(fd, pd) 242 if errno != 0 { 243 pollcache.free(pd) 244 return nil, int(errno) 245 } 246 return pd, 0 247 } 248 249 //go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose 250 func poll_runtime_pollClose(pd *pollDesc) { 251 if !pd.closing { 252 throw("runtime: close polldesc w/o unblock") 253 } 254 wg := pd.wg.Load() 255 if wg != pdNil && wg != pdReady { 256 throw("runtime: blocked write on closing polldesc") 257 } 258 rg := pd.rg.Load() 259 if rg != pdNil && rg != pdReady { 260 throw("runtime: blocked read on closing polldesc") 261 } 262 netpollclose(pd.fd) 263 pollcache.free(pd) 264 } 265 266 func (c *pollCache) free(pd *pollDesc) { 267 lock(&c.lock) 268 pd.link = c.first 269 c.first = pd 270 unlock(&c.lock) 271 } 272 273 // poll_runtime_pollReset, which is internal/poll.runtime_pollReset, 274 // prepares a descriptor for polling in mode, which is 'r' or 'w'. 275 // This returns an error code; the codes are defined above. 276 // 277 //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset 278 func poll_runtime_pollReset(pd *pollDesc, mode int) int { 279 errcode := netpollcheckerr(pd, int32(mode)) 280 if errcode != pollNoError { 281 return errcode 282 } 283 if mode == 'r' { 284 pd.rg.Store(pdNil) 285 } else if mode == 'w' { 286 pd.wg.Store(pdNil) 287 } 288 return pollNoError 289 } 290 291 // poll_runtime_pollWait, which is internal/poll.runtime_pollWait, 292 // waits for a descriptor to be ready for reading or writing, 293 // according to mode, which is 'r' or 'w'. 294 // This returns an error code; the codes are defined above. 295 // 296 //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait 297 func poll_runtime_pollWait(pd *pollDesc, mode int) int { 298 errcode := netpollcheckerr(pd, int32(mode)) 299 if errcode != pollNoError { 300 return errcode 301 } 302 // As for now only Solaris, illumos, and AIX use level-triggered IO. 303 if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" { 304 netpollarm(pd, mode) 305 } 306 for !netpollblock(pd, int32(mode), false) { 307 errcode = netpollcheckerr(pd, int32(mode)) 308 if errcode != pollNoError { 309 return errcode 310 } 311 // Can happen if timeout has fired and unblocked us, 312 // but before we had a chance to run, timeout has been reset. 313 // Pretend it has not happened and retry. 314 } 315 return pollNoError 316 } 317 318 //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled 319 func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) { 320 // This function is used only on windows after a failed attempt to cancel 321 // a pending async IO operation. Wait for ioready, ignore closing or timeouts. 322 for !netpollblock(pd, int32(mode), true) { 323 } 324 } 325 326 //go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline 327 func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { 328 lock(&pd.lock) 329 if pd.closing { 330 unlock(&pd.lock) 331 return 332 } 333 rd0, wd0 := pd.rd, pd.wd 334 combo0 := rd0 > 0 && rd0 == wd0 335 if d > 0 { 336 d += nanotime() 337 if d <= 0 { 338 // If the user has a deadline in the future, but the delay calculation 339 // overflows, then set the deadline to the maximum possible value. 340 d = 1<<63 - 1 341 } 342 } 343 if mode == 'r' || mode == 'r'+'w' { 344 pd.rd = d 345 } 346 if mode == 'w' || mode == 'r'+'w' { 347 pd.wd = d 348 } 349 pd.publishInfo() 350 combo := pd.rd > 0 && pd.rd == pd.wd 351 rtf := netpollReadDeadline 352 if combo { 353 rtf = netpollDeadline 354 } 355 if pd.rt.f == nil { 356 if pd.rd > 0 { 357 pd.rt.f = rtf 358 // Copy current seq into the timer arg. 359 // Timer func will check the seq against current descriptor seq, 360 // if they differ the descriptor was reused or timers were reset. 361 pd.rt.arg = pd.makeArg() 362 pd.rt.seq = pd.rseq 363 resettimer(&pd.rt, pd.rd) 364 } 365 } else if pd.rd != rd0 || combo != combo0 { 366 pd.rseq++ // invalidate current timers 367 if pd.rd > 0 { 368 modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq) 369 } else { 370 deltimer(&pd.rt) 371 pd.rt.f = nil 372 } 373 } 374 if pd.wt.f == nil { 375 if pd.wd > 0 && !combo { 376 pd.wt.f = netpollWriteDeadline 377 pd.wt.arg = pd.makeArg() 378 pd.wt.seq = pd.wseq 379 resettimer(&pd.wt, pd.wd) 380 } 381 } else if pd.wd != wd0 || combo != combo0 { 382 pd.wseq++ // invalidate current timers 383 if pd.wd > 0 && !combo { 384 modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq) 385 } else { 386 deltimer(&pd.wt) 387 pd.wt.f = nil 388 } 389 } 390 // If we set the new deadline in the past, unblock currently pending IO if any. 391 // Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd. 392 var rg, wg *g 393 if pd.rd < 0 { 394 rg = netpollunblock(pd, 'r', false) 395 } 396 if pd.wd < 0 { 397 wg = netpollunblock(pd, 'w', false) 398 } 399 unlock(&pd.lock) 400 if rg != nil { 401 netpollgoready(rg, 3) 402 } 403 if wg != nil { 404 netpollgoready(wg, 3) 405 } 406 } 407 408 //go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock 409 func poll_runtime_pollUnblock(pd *pollDesc) { 410 lock(&pd.lock) 411 if pd.closing { 412 throw("runtime: unblock on closing polldesc") 413 } 414 pd.closing = true 415 pd.rseq++ 416 pd.wseq++ 417 var rg, wg *g 418 pd.publishInfo() 419 rg = netpollunblock(pd, 'r', false) 420 wg = netpollunblock(pd, 'w', false) 421 if pd.rt.f != nil { 422 deltimer(&pd.rt) 423 pd.rt.f = nil 424 } 425 if pd.wt.f != nil { 426 deltimer(&pd.wt) 427 pd.wt.f = nil 428 } 429 unlock(&pd.lock) 430 if rg != nil { 431 netpollgoready(rg, 3) 432 } 433 if wg != nil { 434 netpollgoready(wg, 3) 435 } 436 } 437 438 // netpollready is called by the platform-specific netpoll function. 439 // It declares that the fd associated with pd is ready for I/O. 440 // The toRun argument is used to build a list of goroutines to return 441 // from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate 442 // whether the fd is ready for reading or writing or both. 443 // 444 // This may run while the world is stopped, so write barriers are not allowed. 445 // 446 //go:nowritebarrier 447 func netpollready(toRun *gList, pd *pollDesc, mode int32) { 448 var rg, wg *g 449 if mode == 'r' || mode == 'r'+'w' { 450 rg = netpollunblock(pd, 'r', true) 451 } 452 if mode == 'w' || mode == 'r'+'w' { 453 wg = netpollunblock(pd, 'w', true) 454 } 455 if rg != nil { 456 toRun.push(rg) 457 } 458 if wg != nil { 459 toRun.push(wg) 460 } 461 } 462 463 func netpollcheckerr(pd *pollDesc, mode int32) int { 464 info := pd.info() 465 if info.closing() { 466 return pollErrClosing 467 } 468 if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) { 469 return pollErrTimeout 470 } 471 // Report an event scanning error only on a read event. 472 // An error on a write event will be captured in a subsequent 473 // write call that is able to report a more specific error. 474 if mode == 'r' && info.eventErr() { 475 return pollErrNotPollable 476 } 477 return pollNoError 478 } 479 480 func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool { 481 r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp))) 482 if r { 483 // Bump the count of goroutines waiting for the poller. 484 // The scheduler uses this to decide whether to block 485 // waiting for the poller if there is nothing else to do. 486 netpollWaiters.Add(1) 487 } 488 return r 489 } 490 491 func netpollgoready(gp *g, traceskip int) { 492 netpollWaiters.Add(-1) 493 goready(gp, traceskip+1) 494 } 495 496 // returns true if IO is ready, or false if timed out or closed 497 // waitio - wait only for completed IO, ignore errors 498 // Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc 499 // can hold only a single waiting goroutine for each mode. 500 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool { 501 gpp := &pd.rg 502 if mode == 'w' { 503 gpp = &pd.wg 504 } 505 506 // set the gpp semaphore to pdWait 507 for { 508 // Consume notification if already ready. 509 if gpp.CompareAndSwap(pdReady, pdNil) { 510 return true 511 } 512 if gpp.CompareAndSwap(pdNil, pdWait) { 513 break 514 } 515 516 // Double check that this isn't corrupt; otherwise we'd loop 517 // forever. 518 if v := gpp.Load(); v != pdReady && v != pdNil { 519 throw("runtime: double wait") 520 } 521 } 522 523 // need to recheck error states after setting gpp to pdWait 524 // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl 525 // do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg 526 if waitio || netpollcheckerr(pd, mode) == pollNoError { 527 gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5) 528 } 529 // be careful to not lose concurrent pdReady notification 530 old := gpp.Swap(pdNil) 531 if old > pdWait { 532 throw("runtime: corrupted polldesc") 533 } 534 return old == pdReady 535 } 536 537 func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g { 538 gpp := &pd.rg 539 if mode == 'w' { 540 gpp = &pd.wg 541 } 542 543 for { 544 old := gpp.Load() 545 if old == pdReady { 546 return nil 547 } 548 if old == pdNil && !ioready { 549 // Only set pdReady for ioready. runtime_pollWait 550 // will check for timeout/cancel before waiting. 551 return nil 552 } 553 var new uintptr 554 if ioready { 555 new = pdReady 556 } 557 if gpp.CompareAndSwap(old, new) { 558 if old == pdWait { 559 old = pdNil 560 } 561 return (*g)(unsafe.Pointer(old)) 562 } 563 } 564 } 565 566 func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { 567 lock(&pd.lock) 568 // Seq arg is seq when the timer was set. 569 // If it's stale, ignore the timer event. 570 currentSeq := pd.rseq 571 if !read { 572 currentSeq = pd.wseq 573 } 574 if seq != currentSeq { 575 // The descriptor was reused or timers were reset. 576 unlock(&pd.lock) 577 return 578 } 579 var rg *g 580 if read { 581 if pd.rd <= 0 || pd.rt.f == nil { 582 throw("runtime: inconsistent read deadline") 583 } 584 pd.rd = -1 585 pd.publishInfo() 586 rg = netpollunblock(pd, 'r', false) 587 } 588 var wg *g 589 if write { 590 if pd.wd <= 0 || pd.wt.f == nil && !read { 591 throw("runtime: inconsistent write deadline") 592 } 593 pd.wd = -1 594 pd.publishInfo() 595 wg = netpollunblock(pd, 'w', false) 596 } 597 unlock(&pd.lock) 598 if rg != nil { 599 netpollgoready(rg, 0) 600 } 601 if wg != nil { 602 netpollgoready(wg, 0) 603 } 604 } 605 606 func netpollDeadline(arg any, seq uintptr) { 607 netpolldeadlineimpl(arg.(*pollDesc), seq, true, true) 608 } 609 610 func netpollReadDeadline(arg any, seq uintptr) { 611 netpolldeadlineimpl(arg.(*pollDesc), seq, true, false) 612 } 613 614 func netpollWriteDeadline(arg any, seq uintptr) { 615 netpolldeadlineimpl(arg.(*pollDesc), seq, false, true) 616 } 617 618 func (c *pollCache) alloc() *pollDesc { 619 lock(&c.lock) 620 if c.first == nil { 621 const pdSize = unsafe.Sizeof(pollDesc{}) 622 n := pollBlockSize / pdSize 623 if n == 0 { 624 n = 1 625 } 626 // Must be in non-GC memory because can be referenced 627 // only from epoll/kqueue internals. 628 mem := persistentalloc(n*pdSize, 0, &memstats.other_sys) 629 for i := uintptr(0); i < n; i++ { 630 pd := (*pollDesc)(add(mem, i*pdSize)) 631 pd.link = c.first 632 c.first = pd 633 } 634 } 635 pd := c.first 636 c.first = pd.link 637 lockInit(&pd.lock, lockRankPollDesc) 638 unlock(&c.lock) 639 return pd 640 } 641 642 // makeArg converts pd to an interface{}. 643 // makeArg does not do any allocation. Normally, such 644 // a conversion requires an allocation because pointers to 645 // types which embed runtime/internal/sys.NotInHeap (which pollDesc is) 646 // must be stored in interfaces indirectly. See issue 42076. 647 func (pd *pollDesc) makeArg() (i any) { 648 x := (*eface)(unsafe.Pointer(&i)) 649 x._type = pdType 650 x.data = unsafe.Pointer(&pd.self) 651 return 652 } 653 654 var ( 655 pdEface any = (*pollDesc)(nil) 656 pdType *_type = efaceOf(&pdEface)._type 657 )