github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/poll/fd_unix.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris
     6  
     7  package poll
     8  
     9  import (
    10  	"io"
    11  	"sync/atomic"
    12  	"syscall"
    13  )
    14  
    15  // FD is a file descriptor. The net and os packages use this type as a
    16  // field of a larger type representing a network connection or OS file.
    17  type FD struct {
    18  	// Lock sysfd and serialize access to Read and Write methods.
    19  	fdmu fdMutex
    20  
    21  	// System file descriptor. Immutable until Close.
    22  	Sysfd int
    23  
    24  	// I/O poller.
    25  	pd pollDesc
    26  
    27  	// Writev cache.
    28  	iovecs *[]syscall.Iovec
    29  
    30  	// Semaphore signaled when file is closed.
    31  	csema uint32
    32  
    33  	// Non-zero if this file has been set to blocking mode.
    34  	isBlocking uint32
    35  
    36  	// Whether this is a streaming descriptor, as opposed to a
    37  	// packet-based descriptor like a UDP socket. Immutable.
    38  	IsStream bool
    39  
    40  	// Whether a zero byte read indicates EOF. This is false for a
    41  	// message based socket connection.
    42  	ZeroReadIsEOF bool
    43  
    44  	// Whether this is a file rather than a network socket.
    45  	isFile bool
    46  }
    47  
    48  // Init initializes the FD. The Sysfd field should already be set.
    49  // This can be called multiple times on a single FD.
    50  // The net argument is a network name from the net package (e.g., "tcp"),
    51  // or "file".
    52  // Set pollable to true if fd should be managed by runtime netpoll.
    53  func (fd *FD) Init(net string, pollable bool) error {
    54  	// We don't actually care about the various network types.
    55  	if net == "file" {
    56  		fd.isFile = true
    57  	}
    58  	if !pollable {
    59  		fd.isBlocking = 1
    60  		return nil
    61  	}
    62  	err := fd.pd.init(fd)
    63  	if err != nil {
    64  		// If we could not initialize the runtime poller,
    65  		// assume we are using blocking mode.
    66  		fd.isBlocking = 1
    67  	}
    68  	return err
    69  }
    70  
    71  // Destroy closes the file descriptor. This is called when there are
    72  // no remaining references.
    73  func (fd *FD) destroy() error {
    74  	// Poller may want to unregister fd in readiness notification mechanism,
    75  	// so this must be executed before CloseFunc.
    76  	fd.pd.close()
    77  
    78  	// We don't use ignoringEINTR here because POSIX does not define
    79  	// whether the descriptor is closed if close returns EINTR.
    80  	// If the descriptor is indeed closed, using a loop would race
    81  	// with some other goroutine opening a new descriptor.
    82  	// (The Linux kernel guarantees that it is closed on an EINTR error.)
    83  	err := CloseFunc(fd.Sysfd)
    84  
    85  	fd.Sysfd = -1
    86  	runtime_Semrelease(&fd.csema)
    87  	return err
    88  }
    89  
    90  // Close closes the FD. The underlying file descriptor is closed by the
    91  // destroy method when there are no remaining references.
    92  func (fd *FD) Close() error {
    93  	if !fd.fdmu.increfAndClose() {
    94  		return errClosing(fd.isFile)
    95  	}
    96  
    97  	// Unblock any I/O.  Once it all unblocks and returns,
    98  	// so that it cannot be referring to fd.sysfd anymore,
    99  	// the final decref will close fd.sysfd. This should happen
   100  	// fairly quickly, since all the I/O is non-blocking, and any
   101  	// attempts to block in the pollDesc will return errClosing(fd.isFile).
   102  	fd.pd.evict()
   103  
   104  	// The call to decref will call destroy if there are no other
   105  	// references.
   106  	err := fd.decref()
   107  
   108  	// Wait until the descriptor is closed. If this was the only
   109  	// reference, it is already closed. Only wait if the file has
   110  	// not been set to blocking mode, as otherwise any current I/O
   111  	// may be blocking, and that would block the Close.
   112  	// No need for an atomic read of isBlocking, increfAndClose means
   113  	// we have exclusive access to fd.
   114  	if fd.isBlocking == 0 {
   115  		runtime_Semacquire(&fd.csema)
   116  	}
   117  
   118  	return err
   119  }
   120  
   121  // SetBlocking puts the file into blocking mode.
   122  func (fd *FD) SetBlocking() error {
   123  	if err := fd.incref(); err != nil {
   124  		return err
   125  	}
   126  	defer fd.decref()
   127  	// Atomic store so that concurrent calls to SetBlocking
   128  	// do not cause a race condition. isBlocking only ever goes
   129  	// from 0 to 1 so there is no real race here.
   130  	atomic.StoreUint32(&fd.isBlocking, 1)
   131  	return syscall.SetNonblock(fd.Sysfd, false)
   132  }
   133  
   134  // Darwin and FreeBSD can't read or write 2GB+ files at a time,
   135  // even on 64-bit systems.
   136  // The same is true of socket implementations on many systems.
   137  // See golang.org/issue/7812 and golang.org/issue/16266.
   138  // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
   139  const maxRW = 1 << 30
   140  
   141  // Read implements io.Reader.
   142  func (fd *FD) Read(p []byte) (int, error) {
   143  	if err := fd.readLock(); err != nil {
   144  		return 0, err
   145  	}
   146  	defer fd.readUnlock()
   147  	if len(p) == 0 {
   148  		// If the caller wanted a zero byte read, return immediately
   149  		// without trying (but after acquiring the readLock).
   150  		// Otherwise syscall.Read returns 0, nil which looks like
   151  		// io.EOF.
   152  		// TODO(bradfitz): make it wait for readability? (Issue 15735)
   153  		return 0, nil
   154  	}
   155  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   156  		return 0, err
   157  	}
   158  	if fd.IsStream && len(p) > maxRW {
   159  		p = p[:maxRW]
   160  	}
   161  	for {
   162  		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
   163  		if err != nil {
   164  			n = 0
   165  			if err == syscall.EAGAIN && fd.pd.pollable() {
   166  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   167  					continue
   168  				}
   169  			}
   170  		}
   171  		err = fd.eofError(n, err)
   172  		return n, err
   173  	}
   174  }
   175  
   176  // Pread wraps the pread system call.
   177  func (fd *FD) Pread(p []byte, off int64) (int, error) {
   178  	// Call incref, not readLock, because since pread specifies the
   179  	// offset it is independent from other reads.
   180  	// Similarly, using the poller doesn't make sense for pread.
   181  	if err := fd.incref(); err != nil {
   182  		return 0, err
   183  	}
   184  	if fd.IsStream && len(p) > maxRW {
   185  		p = p[:maxRW]
   186  	}
   187  	var (
   188  		n   int
   189  		err error
   190  	)
   191  	for {
   192  		n, err = syscall.Pread(fd.Sysfd, p, off)
   193  		if err != syscall.EINTR {
   194  			break
   195  		}
   196  	}
   197  	if err != nil {
   198  		n = 0
   199  	}
   200  	fd.decref()
   201  	err = fd.eofError(n, err)
   202  	return n, err
   203  }
   204  
   205  // ReadFrom wraps the recvfrom network call.
   206  func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
   207  	if err := fd.readLock(); err != nil {
   208  		return 0, nil, err
   209  	}
   210  	defer fd.readUnlock()
   211  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   212  		return 0, nil, err
   213  	}
   214  	for {
   215  		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
   216  		if err != nil {
   217  			if err == syscall.EINTR {
   218  				continue
   219  			}
   220  			n = 0
   221  			if err == syscall.EAGAIN && fd.pd.pollable() {
   222  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   223  					continue
   224  				}
   225  			}
   226  		}
   227  		err = fd.eofError(n, err)
   228  		return n, sa, err
   229  	}
   230  }
   231  
   232  // ReadMsg wraps the recvmsg network call.
   233  func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) {
   234  	if err := fd.readLock(); err != nil {
   235  		return 0, 0, 0, nil, err
   236  	}
   237  	defer fd.readUnlock()
   238  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   239  		return 0, 0, 0, nil, err
   240  	}
   241  	for {
   242  		n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0)
   243  		if err != nil {
   244  			if err == syscall.EINTR {
   245  				continue
   246  			}
   247  			// TODO(dfc) should n and oobn be set to 0
   248  			if err == syscall.EAGAIN && fd.pd.pollable() {
   249  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   250  					continue
   251  				}
   252  			}
   253  		}
   254  		err = fd.eofError(n, err)
   255  		return n, oobn, flags, sa, err
   256  	}
   257  }
   258  
   259  // Write implements io.Writer.
   260  func (fd *FD) Write(p []byte) (int, error) {
   261  	if err := fd.writeLock(); err != nil {
   262  		return 0, err
   263  	}
   264  	defer fd.writeUnlock()
   265  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   266  		return 0, err
   267  	}
   268  	var nn int
   269  	for {
   270  		max := len(p)
   271  		if fd.IsStream && max-nn > maxRW {
   272  			max = nn + maxRW
   273  		}
   274  		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
   275  		if n > 0 {
   276  			nn += n
   277  		}
   278  		if nn == len(p) {
   279  			return nn, err
   280  		}
   281  		if err == syscall.EAGAIN && fd.pd.pollable() {
   282  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   283  				continue
   284  			}
   285  		}
   286  		if err != nil {
   287  			return nn, err
   288  		}
   289  		if n == 0 {
   290  			return nn, io.ErrUnexpectedEOF
   291  		}
   292  	}
   293  }
   294  
   295  // Pwrite wraps the pwrite system call.
   296  func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
   297  	// Call incref, not writeLock, because since pwrite specifies the
   298  	// offset it is independent from other writes.
   299  	// Similarly, using the poller doesn't make sense for pwrite.
   300  	if err := fd.incref(); err != nil {
   301  		return 0, err
   302  	}
   303  	defer fd.decref()
   304  	var nn int
   305  	for {
   306  		max := len(p)
   307  		if fd.IsStream && max-nn > maxRW {
   308  			max = nn + maxRW
   309  		}
   310  		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
   311  		if err == syscall.EINTR {
   312  			continue
   313  		}
   314  		if n > 0 {
   315  			nn += n
   316  		}
   317  		if nn == len(p) {
   318  			return nn, err
   319  		}
   320  		if err != nil {
   321  			return nn, err
   322  		}
   323  		if n == 0 {
   324  			return nn, io.ErrUnexpectedEOF
   325  		}
   326  	}
   327  }
   328  
   329  // WriteTo wraps the sendto network call.
   330  func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
   331  	if err := fd.writeLock(); err != nil {
   332  		return 0, err
   333  	}
   334  	defer fd.writeUnlock()
   335  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   336  		return 0, err
   337  	}
   338  	for {
   339  		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
   340  		if err == syscall.EINTR {
   341  			continue
   342  		}
   343  		if err == syscall.EAGAIN && fd.pd.pollable() {
   344  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   345  				continue
   346  			}
   347  		}
   348  		if err != nil {
   349  			return 0, err
   350  		}
   351  		return len(p), nil
   352  	}
   353  }
   354  
   355  // WriteMsg wraps the sendmsg network call.
   356  func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
   357  	if err := fd.writeLock(); err != nil {
   358  		return 0, 0, err
   359  	}
   360  	defer fd.writeUnlock()
   361  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   362  		return 0, 0, err
   363  	}
   364  	for {
   365  		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
   366  		if err == syscall.EINTR {
   367  			continue
   368  		}
   369  		if err == syscall.EAGAIN && fd.pd.pollable() {
   370  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   371  				continue
   372  			}
   373  		}
   374  		if err != nil {
   375  			return n, 0, err
   376  		}
   377  		return n, len(oob), err
   378  	}
   379  }
   380  
   381  // Accept wraps the accept network call.
   382  func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
   383  	if err := fd.readLock(); err != nil {
   384  		return -1, nil, "", err
   385  	}
   386  	defer fd.readUnlock()
   387  
   388  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   389  		return -1, nil, "", err
   390  	}
   391  	for {
   392  		s, rsa, errcall, err := accept(fd.Sysfd)
   393  		if err == nil {
   394  			return s, rsa, "", err
   395  		}
   396  		switch err {
   397  		case syscall.EINTR:
   398  			continue
   399  		case syscall.EAGAIN:
   400  			if fd.pd.pollable() {
   401  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   402  					continue
   403  				}
   404  			}
   405  		case syscall.ECONNABORTED:
   406  			// This means that a socket on the listen
   407  			// queue was closed before we Accept()ed it;
   408  			// it's a silly error, so try again.
   409  			continue
   410  		}
   411  		return -1, nil, errcall, err
   412  	}
   413  }
   414  
   415  // Seek wraps syscall.Seek.
   416  func (fd *FD) Seek(offset int64, whence int) (int64, error) {
   417  	if err := fd.incref(); err != nil {
   418  		return 0, err
   419  	}
   420  	defer fd.decref()
   421  	return syscall.Seek(fd.Sysfd, offset, whence)
   422  }
   423  
   424  // ReadDirent wraps syscall.ReadDirent.
   425  // We treat this like an ordinary system call rather than a call
   426  // that tries to fill the buffer.
   427  func (fd *FD) ReadDirent(buf []byte) (int, error) {
   428  	if err := fd.incref(); err != nil {
   429  		return 0, err
   430  	}
   431  	defer fd.decref()
   432  	for {
   433  		n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
   434  		if err != nil {
   435  			n = 0
   436  			if err == syscall.EAGAIN && fd.pd.pollable() {
   437  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   438  					continue
   439  				}
   440  			}
   441  		}
   442  		// Do not call eofError; caller does not expect to see io.EOF.
   443  		return n, err
   444  	}
   445  }
   446  
   447  // Fchmod wraps syscall.Fchmod.
   448  func (fd *FD) Fchmod(mode uint32) error {
   449  	if err := fd.incref(); err != nil {
   450  		return err
   451  	}
   452  	defer fd.decref()
   453  	return ignoringEINTR(func() error {
   454  		return syscall.Fchmod(fd.Sysfd, mode)
   455  	})
   456  }
   457  
   458  // Fchdir wraps syscall.Fchdir.
   459  func (fd *FD) Fchdir() error {
   460  	if err := fd.incref(); err != nil {
   461  		return err
   462  	}
   463  	defer fd.decref()
   464  	return syscall.Fchdir(fd.Sysfd)
   465  }
   466  
   467  // Fstat wraps syscall.Fstat
   468  func (fd *FD) Fstat(s *syscall.Stat_t) error {
   469  	if err := fd.incref(); err != nil {
   470  		return err
   471  	}
   472  	defer fd.decref()
   473  	return ignoringEINTR(func() error {
   474  		return syscall.Fstat(fd.Sysfd, s)
   475  	})
   476  }
   477  
   478  // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
   479  // If the kernel doesn't support it, this is set to 0.
   480  var tryDupCloexec = int32(1)
   481  
   482  // DupCloseOnExec dups fd and marks it close-on-exec.
   483  func DupCloseOnExec(fd int) (int, string, error) {
   484  	if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 {
   485  		r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
   486  		if e1 == nil {
   487  			return r0, "", nil
   488  		}
   489  		switch e1.(syscall.Errno) {
   490  		case syscall.EINVAL, syscall.ENOSYS:
   491  			// Old kernel, or js/wasm (which returns
   492  			// ENOSYS). Fall back to the portable way from
   493  			// now on.
   494  			atomic.StoreInt32(&tryDupCloexec, 0)
   495  		default:
   496  			return -1, "fcntl", e1
   497  		}
   498  	}
   499  	return dupCloseOnExecOld(fd)
   500  }
   501  
   502  // dupCloseOnExecOld is the traditional way to dup an fd and
   503  // set its O_CLOEXEC bit, using two system calls.
   504  func dupCloseOnExecOld(fd int) (int, string, error) {
   505  	syscall.ForkLock.RLock()
   506  	defer syscall.ForkLock.RUnlock()
   507  	newfd, err := syscall.Dup(fd)
   508  	if err != nil {
   509  		return -1, "dup", err
   510  	}
   511  	syscall.CloseOnExec(newfd)
   512  	return newfd, "", nil
   513  }
   514  
   515  // Dup duplicates the file descriptor.
   516  func (fd *FD) Dup() (int, string, error) {
   517  	if err := fd.incref(); err != nil {
   518  		return -1, "", err
   519  	}
   520  	defer fd.decref()
   521  	return DupCloseOnExec(fd.Sysfd)
   522  }
   523  
   524  // On Unix variants only, expose the IO event for the net code.
   525  
   526  // WaitWrite waits until data can be read from fd.
   527  func (fd *FD) WaitWrite() error {
   528  	return fd.pd.waitWrite(fd.isFile)
   529  }
   530  
   531  // WriteOnce is for testing only. It makes a single write call.
   532  func (fd *FD) WriteOnce(p []byte) (int, error) {
   533  	if err := fd.writeLock(); err != nil {
   534  		return 0, err
   535  	}
   536  	defer fd.writeUnlock()
   537  	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
   538  }
   539  
   540  // RawRead invokes the user-defined function f for a read operation.
   541  func (fd *FD) RawRead(f func(uintptr) bool) error {
   542  	if err := fd.readLock(); err != nil {
   543  		return err
   544  	}
   545  	defer fd.readUnlock()
   546  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   547  		return err
   548  	}
   549  	for {
   550  		if f(uintptr(fd.Sysfd)) {
   551  			return nil
   552  		}
   553  		if err := fd.pd.waitRead(fd.isFile); err != nil {
   554  			return err
   555  		}
   556  	}
   557  }
   558  
   559  // RawWrite invokes the user-defined function f for a write operation.
   560  func (fd *FD) RawWrite(f func(uintptr) bool) error {
   561  	if err := fd.writeLock(); err != nil {
   562  		return err
   563  	}
   564  	defer fd.writeUnlock()
   565  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   566  		return err
   567  	}
   568  	for {
   569  		if f(uintptr(fd.Sysfd)) {
   570  			return nil
   571  		}
   572  		if err := fd.pd.waitWrite(fd.isFile); err != nil {
   573  			return err
   574  		}
   575  	}
   576  }
   577  
   578  // ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
   579  func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
   580  	for {
   581  		n, err := fn(fd, p)
   582  		if err != syscall.EINTR {
   583  			return n, err
   584  		}
   585  	}
   586  }