github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/runtime/netpoll.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
     6  
     7  package runtime
     8  
     9  import "unsafe"
    10  
    11  // Integrated network poller (platform-independent part).
    12  // A particular implementation (epoll/kqueue) must define the following functions:
    13  // func netpollinit()			// to initialize the poller
    14  // func netpollopen(fd uintptr, pd *pollDesc) int32	// to arm edge-triggered notifications
    15  // and associate fd with pd.
    16  // An implementation must call the following function to denote that the pd is ready.
    17  // func netpollready(gpp **g, pd *pollDesc, mode int32)
    18  
    19  // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
    20  // goroutines respectively. The semaphore can be in the following states:
    21  // pdReady - io readiness notification is pending;
    22  //           a goroutine consumes the notification by changing the state to nil.
    23  // pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
    24  //          the goroutine commits to park by changing the state to G pointer,
    25  //          or, alternatively, concurrent io notification changes the state to READY,
    26  //          or, alternatively, concurrent timeout/close changes the state to nil.
    27  // G pointer - the goroutine is blocked on the semaphore;
    28  //             io notification or timeout/close changes the state to READY or nil respectively
    29  //             and unparks the goroutine.
    30  // nil - nothing of the above.
    31  const (
    32  	pdReady uintptr = 1
    33  	pdWait  uintptr = 2
    34  )
    35  
    36  const pollBlockSize = 4 * 1024
    37  
    38  // Network poller descriptor.
    39  type pollDesc struct {
    40  	link *pollDesc // in pollcache, protected by pollcache.lock
    41  
    42  	// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
    43  	// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
    44  	// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
    45  	// proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated
    46  	// in a lock-free way by all operations.
    47  	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
    48  	// that will blow up when GC starts moving objects.
    49  	lock    mutex // protectes the following fields
    50  	fd      uintptr
    51  	closing bool
    52  	seq     uintptr // protects from stale timers and ready notifications
    53  	rg      uintptr // pdReady, pdWait, G waiting for read or nil
    54  	rt      timer   // read deadline timer (set if rt.f != nil)
    55  	rd      int64   // read deadline
    56  	wg      uintptr // pdReady, pdWait, G waiting for write or nil
    57  	wt      timer   // write deadline timer
    58  	wd      int64   // write deadline
    59  	user    uint32  // user settable cookie
    60  }
    61  
    62  type pollCache struct {
    63  	lock  mutex
    64  	first *pollDesc
    65  	// PollDesc objects must be type-stable,
    66  	// because we can get ready notification from epoll/kqueue
    67  	// after the descriptor is closed/reused.
    68  	// Stale notifications are detected using seq variable,
    69  	// seq is incremented when deadlines are changed or descriptor is reused.
    70  }
    71  
    72  var pollcache pollCache
    73  
    74  func netpollServerInit() {
    75  	netpollinit()
    76  }
    77  
    78  func netpollOpen(fd uintptr) (*pollDesc, int) {
    79  	pd := pollcache.alloc()
    80  	lock(&pd.lock)
    81  	if pd.wg != 0 && pd.wg != pdReady {
    82  		gothrow("netpollOpen: blocked write on free descriptor")
    83  	}
    84  	if pd.rg != 0 && pd.rg != pdReady {
    85  		gothrow("netpollOpen: blocked read on free descriptor")
    86  	}
    87  	pd.fd = fd
    88  	pd.closing = false
    89  	pd.seq++
    90  	pd.rg = 0
    91  	pd.rd = 0
    92  	pd.wg = 0
    93  	pd.wd = 0
    94  	unlock(&pd.lock)
    95  
    96  	var errno int32
    97  	errno = netpollopen(fd, pd)
    98  	return pd, int(errno)
    99  }
   100  
   101  func netpollClose(pd *pollDesc) {
   102  	if !pd.closing {
   103  		gothrow("netpollClose: close w/o unblock")
   104  	}
   105  	if pd.wg != 0 && pd.wg != pdReady {
   106  		gothrow("netpollClose: blocked write on closing descriptor")
   107  	}
   108  	if pd.rg != 0 && pd.rg != pdReady {
   109  		gothrow("netpollClose: blocked read on closing descriptor")
   110  	}
   111  	netpollclose(uintptr(pd.fd))
   112  	pollcache.free(pd)
   113  }
   114  
   115  func (c *pollCache) free(pd *pollDesc) {
   116  	lock(&c.lock)
   117  	pd.link = c.first
   118  	c.first = pd
   119  	unlock(&c.lock)
   120  }
   121  
   122  func netpollReset(pd *pollDesc, mode int) int {
   123  	err := netpollcheckerr(pd, int32(mode))
   124  	if err != 0 {
   125  		return err
   126  	}
   127  	if mode == 'r' {
   128  		pd.rg = 0
   129  	} else if mode == 'w' {
   130  		pd.wg = 0
   131  	}
   132  	return 0
   133  }
   134  
   135  func netpollWait(pd *pollDesc, mode int) int {
   136  	err := netpollcheckerr(pd, int32(mode))
   137  	if err != 0 {
   138  		return err
   139  	}
   140  	// As for now only Solaris uses level-triggered IO.
   141  	if GOOS == "solaris" {
   142  		netpollarm(pd, mode)
   143  	}
   144  	for !netpollblock(pd, int32(mode), false) {
   145  		err = netpollcheckerr(pd, int32(mode))
   146  		if err != 0 {
   147  			return err
   148  		}
   149  		// Can happen if timeout has fired and unblocked us,
   150  		// but before we had a chance to run, timeout has been reset.
   151  		// Pretend it has not happened and retry.
   152  	}
   153  	return 0
   154  }
   155  
   156  func netpollWaitCanceled(pd *pollDesc, mode int) {
   157  	// This function is used only on windows after a failed attempt to cancel
   158  	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
   159  	for !netpollblock(pd, int32(mode), true) {
   160  	}
   161  }
   162  
   163  func netpollSetDeadline(pd *pollDesc, d int64, mode int) {
   164  	lock(&pd.lock)
   165  	if pd.closing {
   166  		unlock(&pd.lock)
   167  		return
   168  	}
   169  	pd.seq++ // invalidate current timers
   170  	// Reset current timers.
   171  	if pd.rt.f != nil {
   172  		deltimer(&pd.rt)
   173  		pd.rt.f = nil
   174  	}
   175  	if pd.wt.f != nil {
   176  		deltimer(&pd.wt)
   177  		pd.wt.f = nil
   178  	}
   179  	// Setup new timers.
   180  	if d != 0 && d <= nanotime() {
   181  		d = -1
   182  	}
   183  	if mode == 'r' || mode == 'r'+'w' {
   184  		pd.rd = d
   185  	}
   186  	if mode == 'w' || mode == 'r'+'w' {
   187  		pd.wd = d
   188  	}
   189  	if pd.rd > 0 && pd.rd == pd.wd {
   190  		pd.rt.f = netpollDeadline
   191  		pd.rt.when = pd.rd
   192  		// Copy current seq into the timer arg.
   193  		// Timer func will check the seq against current descriptor seq,
   194  		// if they differ the descriptor was reused or timers were reset.
   195  		pd.rt.arg = pd
   196  		pd.rt.seq = pd.seq
   197  		addtimer(&pd.rt)
   198  	} else {
   199  		if pd.rd > 0 {
   200  			pd.rt.f = netpollReadDeadline
   201  			pd.rt.when = pd.rd
   202  			pd.rt.arg = pd
   203  			pd.rt.seq = pd.seq
   204  			addtimer(&pd.rt)
   205  		}
   206  		if pd.wd > 0 {
   207  			pd.wt.f = netpollWriteDeadline
   208  			pd.wt.when = pd.wd
   209  			pd.wt.arg = pd
   210  			pd.wt.seq = pd.seq
   211  			addtimer(&pd.wt)
   212  		}
   213  	}
   214  	// If we set the new deadline in the past, unblock currently pending IO if any.
   215  	var rg, wg *g
   216  	atomicstorep(unsafe.Pointer(&wg), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
   217  	if pd.rd < 0 {
   218  		rg = netpollunblock(pd, 'r', false)
   219  	}
   220  	if pd.wd < 0 {
   221  		wg = netpollunblock(pd, 'w', false)
   222  	}
   223  	unlock(&pd.lock)
   224  	if rg != nil {
   225  		goready(rg)
   226  	}
   227  	if wg != nil {
   228  		goready(wg)
   229  	}
   230  }
   231  
   232  func netpollUnblock(pd *pollDesc) {
   233  	lock(&pd.lock)
   234  	if pd.closing {
   235  		gothrow("netpollUnblock: already closing")
   236  	}
   237  	pd.closing = true
   238  	pd.seq++
   239  	var rg, wg *g
   240  	atomicstorep(unsafe.Pointer(&rg), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
   241  	rg = netpollunblock(pd, 'r', false)
   242  	wg = netpollunblock(pd, 'w', false)
   243  	if pd.rt.f != nil {
   244  		deltimer(&pd.rt)
   245  		pd.rt.f = nil
   246  	}
   247  	if pd.wt.f != nil {
   248  		deltimer(&pd.wt)
   249  		pd.wt.f = nil
   250  	}
   251  	unlock(&pd.lock)
   252  	if rg != nil {
   253  		goready(rg)
   254  	}
   255  	if wg != nil {
   256  		goready(wg)
   257  	}
   258  }
   259  
   260  // make pd ready, newly runnable goroutines (if any) are returned in rg/wg
   261  func netpollready(gpp **g, pd *pollDesc, mode int32) {
   262  	var rg, wg *g
   263  	if mode == 'r' || mode == 'r'+'w' {
   264  		rg = netpollunblock(pd, 'r', true)
   265  	}
   266  	if mode == 'w' || mode == 'r'+'w' {
   267  		wg = netpollunblock(pd, 'w', true)
   268  	}
   269  	if rg != nil {
   270  		rg.schedlink = *gpp
   271  		*gpp = rg
   272  	}
   273  	if wg != nil {
   274  		wg.schedlink = *gpp
   275  		*gpp = wg
   276  	}
   277  }
   278  
   279  func netpollcheckerr(pd *pollDesc, mode int32) int {
   280  	if pd.closing {
   281  		return 1 // errClosing
   282  	}
   283  	if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
   284  		return 2 // errTimeout
   285  	}
   286  	return 0
   287  }
   288  
   289  func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
   290  	return casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
   291  }
   292  
   293  // returns true if IO is ready, or false if timedout or closed
   294  // waitio - wait only for completed IO, ignore errors
   295  func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
   296  	gpp := &pd.rg
   297  	if mode == 'w' {
   298  		gpp = &pd.wg
   299  	}
   300  
   301  	// set the gpp semaphore to WAIT
   302  	for {
   303  		old := *gpp
   304  		if old == pdReady {
   305  			*gpp = 0
   306  			return true
   307  		}
   308  		if old != 0 {
   309  			gothrow("netpollblock: double wait")
   310  		}
   311  		if casuintptr(gpp, 0, pdWait) {
   312  			break
   313  		}
   314  	}
   315  
   316  	// need to recheck error states after setting gpp to WAIT
   317  	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
   318  	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
   319  	if waitio || netpollcheckerr(pd, mode) == 0 {
   320  		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait")
   321  	}
   322  	// be careful to not lose concurrent READY notification
   323  	old := xchguintptr(gpp, 0)
   324  	if old > pdWait {
   325  		gothrow("netpollblock: corrupted state")
   326  	}
   327  	return old == pdReady
   328  }
   329  
   330  func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
   331  	gpp := &pd.rg
   332  	if mode == 'w' {
   333  		gpp = &pd.wg
   334  	}
   335  
   336  	for {
   337  		old := *gpp
   338  		if old == pdReady {
   339  			return nil
   340  		}
   341  		if old == 0 && !ioready {
   342  			// Only set READY for ioready. runtime_pollWait
   343  			// will check for timeout/cancel before waiting.
   344  			return nil
   345  		}
   346  		var new uintptr
   347  		if ioready {
   348  			new = pdReady
   349  		}
   350  		if casuintptr(gpp, old, new) {
   351  			if old == pdReady || old == pdWait {
   352  				old = 0
   353  			}
   354  			return (*g)(unsafe.Pointer(old))
   355  		}
   356  	}
   357  }
   358  
   359  func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
   360  	lock(&pd.lock)
   361  	// Seq arg is seq when the timer was set.
   362  	// If it's stale, ignore the timer event.
   363  	if seq != pd.seq {
   364  		// The descriptor was reused or timers were reset.
   365  		unlock(&pd.lock)
   366  		return
   367  	}
   368  	var rg *g
   369  	if read {
   370  		if pd.rd <= 0 || pd.rt.f == nil {
   371  			gothrow("netpolldeadlineimpl: inconsistent read deadline")
   372  		}
   373  		pd.rd = -1
   374  		atomicstorep(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
   375  		rg = netpollunblock(pd, 'r', false)
   376  	}
   377  	var wg *g
   378  	if write {
   379  		if pd.wd <= 0 || pd.wt.f == nil && !read {
   380  			gothrow("netpolldeadlineimpl: inconsistent write deadline")
   381  		}
   382  		pd.wd = -1
   383  		atomicstorep(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
   384  		wg = netpollunblock(pd, 'w', false)
   385  	}
   386  	unlock(&pd.lock)
   387  	if rg != nil {
   388  		goready(rg)
   389  	}
   390  	if wg != nil {
   391  		goready(wg)
   392  	}
   393  }
   394  
   395  func netpollDeadline(arg interface{}, seq uintptr) {
   396  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
   397  }
   398  
   399  func netpollReadDeadline(arg interface{}, seq uintptr) {
   400  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
   401  }
   402  
   403  func netpollWriteDeadline(arg interface{}, seq uintptr) {
   404  	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
   405  }
   406  
   407  func (c *pollCache) alloc() *pollDesc {
   408  	lock(&c.lock)
   409  	if c.first == nil {
   410  		const pdSize = unsafe.Sizeof(pollDesc{})
   411  		n := pollBlockSize / pdSize
   412  		if n == 0 {
   413  			n = 1
   414  		}
   415  		// Must be in non-GC memory because can be referenced
   416  		// only from epoll/kqueue internals.
   417  		mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
   418  		for i := uintptr(0); i < n; i++ {
   419  			pd := (*pollDesc)(add(mem, i*pdSize))
   420  			pd.link = c.first
   421  			c.first = pd
   422  		}
   423  	}
   424  	pd := c.first
   425  	c.first = pd.link
   426  	unlock(&c.lock)
   427  	return pd
   428  }