github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/poll/fd_windows.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package poll
     6  
     7  import (
     8  	"errors"
     9  	"io"
    10  	"sync"
    11  	"syscall"
    12  	"unicode/utf16"
    13  	"unicode/utf8"
    14  	"unsafe"
    15  
    16  	"github.com/SandwichDev/go-internals/race"
    17  	"github.com/SandwichDev/go-internals/syscall/windows"
    18  )
    19  
    20  var (
    21  	initErr error
    22  	ioSync  uint64
    23  )
    24  
    25  // This package uses the SetFileCompletionNotificationModes Windows
    26  // API to skip calling GetQueuedCompletionStatus if an IO operation
    27  // completes synchronously. There is a known bug where
    28  // SetFileCompletionNotificationModes crashes on some systems (see
    29  // https://support.microsoft.com/kb/2568167 for details).
    30  
    31  var useSetFileCompletionNotificationModes bool // determines is SetFileCompletionNotificationModes is present and safe to use
    32  
    33  // checkSetFileCompletionNotificationModes verifies that
    34  // SetFileCompletionNotificationModes Windows API is present
    35  // on the system and is safe to use.
    36  // See https://support.microsoft.com/kb/2568167 for details.
    37  func checkSetFileCompletionNotificationModes() {
    38  	err := syscall.LoadSetFileCompletionNotificationModes()
    39  	if err != nil {
    40  		return
    41  	}
    42  	protos := [2]int32{syscall.IPPROTO_TCP, 0}
    43  	var buf [32]syscall.WSAProtocolInfo
    44  	len := uint32(unsafe.Sizeof(buf))
    45  	n, err := syscall.WSAEnumProtocols(&protos[0], &buf[0], &len)
    46  	if err != nil {
    47  		return
    48  	}
    49  	for i := int32(0); i < n; i++ {
    50  		if buf[i].ServiceFlags1&syscall.XP1_IFS_HANDLES == 0 {
    51  			return
    52  		}
    53  	}
    54  	useSetFileCompletionNotificationModes = true
    55  }
    56  
    57  func init() {
    58  	var d syscall.WSAData
    59  	e := syscall.WSAStartup(uint32(0x202), &d)
    60  	if e != nil {
    61  		initErr = e
    62  	}
    63  	checkSetFileCompletionNotificationModes()
    64  }
    65  
    66  // operation contains superset of data necessary to perform all async IO.
    67  type operation struct {
    68  	// Used by IOCP interface, it must be first field
    69  	// of the struct, as our code rely on it.
    70  	o syscall.Overlapped
    71  
    72  	// fields used by runtime.netpoll
    73  	runtimeCtx uintptr
    74  	mode       int32
    75  	errno      int32
    76  	qty        uint32
    77  
    78  	// fields used only by net package
    79  	fd     *FD
    80  	buf    syscall.WSABuf
    81  	msg    windows.WSAMsg
    82  	sa     syscall.Sockaddr
    83  	rsa    *syscall.RawSockaddrAny
    84  	rsan   int32
    85  	handle syscall.Handle
    86  	flags  uint32
    87  	bufs   []syscall.WSABuf
    88  }
    89  
    90  func (o *operation) InitBuf(buf []byte) {
    91  	o.buf.Len = uint32(len(buf))
    92  	o.buf.Buf = nil
    93  	if len(buf) != 0 {
    94  		o.buf.Buf = &buf[0]
    95  	}
    96  }
    97  
    98  func (o *operation) InitBufs(buf *[][]byte) {
    99  	if o.bufs == nil {
   100  		o.bufs = make([]syscall.WSABuf, 0, len(*buf))
   101  	} else {
   102  		o.bufs = o.bufs[:0]
   103  	}
   104  	for _, b := range *buf {
   105  		if len(b) == 0 {
   106  			o.bufs = append(o.bufs, syscall.WSABuf{})
   107  			continue
   108  		}
   109  		for len(b) > maxRW {
   110  			o.bufs = append(o.bufs, syscall.WSABuf{Len: maxRW, Buf: &b[0]})
   111  			b = b[maxRW:]
   112  		}
   113  		if len(b) > 0 {
   114  			o.bufs = append(o.bufs, syscall.WSABuf{Len: uint32(len(b)), Buf: &b[0]})
   115  		}
   116  	}
   117  }
   118  
   119  // ClearBufs clears all pointers to Buffers parameter captured
   120  // by InitBufs, so it can be released by garbage collector.
   121  func (o *operation) ClearBufs() {
   122  	for i := range o.bufs {
   123  		o.bufs[i].Buf = nil
   124  	}
   125  	o.bufs = o.bufs[:0]
   126  }
   127  
   128  func (o *operation) InitMsg(p []byte, oob []byte) {
   129  	o.InitBuf(p)
   130  	o.msg.Buffers = &o.buf
   131  	o.msg.BufferCount = 1
   132  
   133  	o.msg.Name = nil
   134  	o.msg.Namelen = 0
   135  
   136  	o.msg.Flags = 0
   137  	o.msg.Control.Len = uint32(len(oob))
   138  	o.msg.Control.Buf = nil
   139  	if len(oob) != 0 {
   140  		o.msg.Control.Buf = &oob[0]
   141  	}
   142  }
   143  
   144  // execIO executes a single IO operation o. It submits and cancels
   145  // IO in the current thread for systems where Windows CancelIoEx API
   146  // is available. Alternatively, it passes the request onto
   147  // runtime netpoll and waits for completion or cancels request.
   148  func execIO(o *operation, submit func(o *operation) error) (int, error) {
   149  	if o.fd.pd.runtimeCtx == 0 {
   150  		return 0, errors.New("internal error: polling on unsupported descriptor type")
   151  	}
   152  
   153  	fd := o.fd
   154  	// Notify runtime netpoll about starting IO.
   155  	err := fd.pd.prepare(int(o.mode), fd.isFile)
   156  	if err != nil {
   157  		return 0, err
   158  	}
   159  	// Start IO.
   160  	err = submit(o)
   161  	switch err {
   162  	case nil:
   163  		// IO completed immediately
   164  		if o.fd.skipSyncNotif {
   165  			// No completion message will follow, so return immediately.
   166  			return int(o.qty), nil
   167  		}
   168  		// Need to get our completion message anyway.
   169  	case syscall.ERROR_IO_PENDING:
   170  		// IO started, and we have to wait for its completion.
   171  		err = nil
   172  	default:
   173  		return 0, err
   174  	}
   175  	// Wait for our request to complete.
   176  	err = fd.pd.wait(int(o.mode), fd.isFile)
   177  	if err == nil {
   178  		// All is good. Extract our IO results and return.
   179  		if o.errno != 0 {
   180  			err = syscall.Errno(o.errno)
   181  			// More data available. Return back the size of received data.
   182  			if err == syscall.ERROR_MORE_DATA || err == windows.WSAEMSGSIZE {
   183  				return int(o.qty), err
   184  			}
   185  			return 0, err
   186  		}
   187  		return int(o.qty), nil
   188  	}
   189  	// IO is interrupted by "close" or "timeout"
   190  	netpollErr := err
   191  	switch netpollErr {
   192  	case ErrNetClosing, ErrFileClosing, ErrDeadlineExceeded:
   193  		// will deal with those.
   194  	default:
   195  		panic("unexpected runtime.netpoll error: " + netpollErr.Error())
   196  	}
   197  	// Cancel our request.
   198  	err = syscall.CancelIoEx(fd.Sysfd, &o.o)
   199  	// Assuming ERROR_NOT_FOUND is returned, if IO is completed.
   200  	if err != nil && err != syscall.ERROR_NOT_FOUND {
   201  		// TODO(brainman): maybe do something else, but panic.
   202  		panic(err)
   203  	}
   204  	// Wait for cancellation to complete.
   205  	fd.pd.waitCanceled(int(o.mode))
   206  	if o.errno != 0 {
   207  		err = syscall.Errno(o.errno)
   208  		if err == syscall.ERROR_OPERATION_ABORTED { // IO Canceled
   209  			err = netpollErr
   210  		}
   211  		return 0, err
   212  	}
   213  	// We issued a cancellation request. But, it seems, IO operation succeeded
   214  	// before the cancellation request run. We need to treat the IO operation as
   215  	// succeeded (the bytes are actually sent/recv from network).
   216  	return int(o.qty), nil
   217  }
   218  
   219  // FD is a file descriptor. The net and os packages embed this type in
   220  // a larger type representing a network connection or OS file.
   221  type FD struct {
   222  	// Lock sysfd and serialize access to Read and Write methods.
   223  	fdmu fdMutex
   224  
   225  	// System file descriptor. Immutable until Close.
   226  	Sysfd syscall.Handle
   227  
   228  	// Read operation.
   229  	rop operation
   230  	// Write operation.
   231  	wop operation
   232  
   233  	// I/O poller.
   234  	pd pollDesc
   235  
   236  	// Used to implement pread/pwrite.
   237  	l sync.Mutex
   238  
   239  	// For console I/O.
   240  	lastbits       []byte   // first few bytes of the last incomplete rune in last write
   241  	readuint16     []uint16 // buffer to hold uint16s obtained with ReadConsole
   242  	readbyte       []byte   // buffer to hold decoding of readuint16 from utf16 to utf8
   243  	readbyteOffset int      // readbyte[readOffset:] is yet to be consumed with file.Read
   244  
   245  	// Semaphore signaled when file is closed.
   246  	csema uint32
   247  
   248  	skipSyncNotif bool
   249  
   250  	// Whether this is a streaming descriptor, as opposed to a
   251  	// packet-based descriptor like a UDP socket.
   252  	IsStream bool
   253  
   254  	// Whether a zero byte read indicates EOF. This is false for a
   255  	// message based socket connection.
   256  	ZeroReadIsEOF bool
   257  
   258  	// Whether this is a file rather than a network socket.
   259  	isFile bool
   260  
   261  	// The kind of this file.
   262  	kind fileKind
   263  }
   264  
   265  // fileKind describes the kind of file.
   266  type fileKind byte
   267  
   268  const (
   269  	kindNet fileKind = iota
   270  	kindFile
   271  	kindConsole
   272  	kindDir
   273  	kindPipe
   274  )
   275  
   276  // logInitFD is set by tests to enable file descriptor initialization logging.
   277  var logInitFD func(net string, fd *FD, err error)
   278  
   279  // Init initializes the FD. The Sysfd field should already be set.
   280  // This can be called multiple times on a single FD.
   281  // The net argument is a network name from the net package (e.g., "tcp"),
   282  // or "file" or "console" or "dir".
   283  // Set pollable to true if fd should be managed by runtime netpoll.
   284  func (fd *FD) Init(net string, pollable bool) (string, error) {
   285  	if initErr != nil {
   286  		return "", initErr
   287  	}
   288  
   289  	switch net {
   290  	case "file":
   291  		fd.kind = kindFile
   292  	case "console":
   293  		fd.kind = kindConsole
   294  	case "dir":
   295  		fd.kind = kindDir
   296  	case "pipe":
   297  		fd.kind = kindPipe
   298  	case "tcp", "tcp4", "tcp6",
   299  		"udp", "udp4", "udp6",
   300  		"ip", "ip4", "ip6",
   301  		"unix", "unixgram", "unixpacket":
   302  		fd.kind = kindNet
   303  	default:
   304  		return "", errors.New("internal error: unknown network type " + net)
   305  	}
   306  	fd.isFile = fd.kind != kindNet
   307  
   308  	var err error
   309  	if pollable {
   310  		// Only call init for a network socket.
   311  		// This means that we don't add files to the runtime poller.
   312  		// Adding files to the runtime poller can confuse matters
   313  		// if the user is doing their own overlapped I/O.
   314  		// See issue #21172.
   315  		//
   316  		// In general the code below avoids calling the execIO
   317  		// function for non-network sockets. If some method does
   318  		// somehow call execIO, then execIO, and therefore the
   319  		// calling method, will return an error, because
   320  		// fd.pd.runtimeCtx will be 0.
   321  		err = fd.pd.init(fd)
   322  	}
   323  	if logInitFD != nil {
   324  		logInitFD(net, fd, err)
   325  	}
   326  	if err != nil {
   327  		return "", err
   328  	}
   329  	if pollable && useSetFileCompletionNotificationModes {
   330  		// We do not use events, so we can skip them always.
   331  		flags := uint8(syscall.FILE_SKIP_SET_EVENT_ON_HANDLE)
   332  		// It's not safe to skip completion notifications for UDP:
   333  		// https://docs.microsoft.com/en-us/archive/blogs/winserverperformance/designing-applications-for-high-performance-part-iii
   334  		if net == "tcp" {
   335  			flags |= syscall.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS
   336  		}
   337  		err := syscall.SetFileCompletionNotificationModes(fd.Sysfd, flags)
   338  		if err == nil && flags&syscall.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS != 0 {
   339  			fd.skipSyncNotif = true
   340  		}
   341  	}
   342  	// Disable SIO_UDP_CONNRESET behavior.
   343  	// http://support.microsoft.com/kb/263823
   344  	switch net {
   345  	case "udp", "udp4", "udp6":
   346  		ret := uint32(0)
   347  		flag := uint32(0)
   348  		size := uint32(unsafe.Sizeof(flag))
   349  		err := syscall.WSAIoctl(fd.Sysfd, syscall.SIO_UDP_CONNRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
   350  		if err != nil {
   351  			return "wsaioctl", err
   352  		}
   353  	}
   354  	fd.rop.mode = 'r'
   355  	fd.wop.mode = 'w'
   356  	fd.rop.fd = fd
   357  	fd.wop.fd = fd
   358  	fd.rop.runtimeCtx = fd.pd.runtimeCtx
   359  	fd.wop.runtimeCtx = fd.pd.runtimeCtx
   360  	return "", nil
   361  }
   362  
   363  func (fd *FD) destroy() error {
   364  	if fd.Sysfd == syscall.InvalidHandle {
   365  		return syscall.EINVAL
   366  	}
   367  	// Poller may want to unregister fd in readiness notification mechanism,
   368  	// so this must be executed before fd.CloseFunc.
   369  	fd.pd.close()
   370  	var err error
   371  	switch fd.kind {
   372  	case kindNet:
   373  		// The net package uses the CloseFunc variable for testing.
   374  		err = CloseFunc(fd.Sysfd)
   375  	case kindDir:
   376  		err = syscall.FindClose(fd.Sysfd)
   377  	default:
   378  		err = syscall.CloseHandle(fd.Sysfd)
   379  	}
   380  	fd.Sysfd = syscall.InvalidHandle
   381  	runtime_Semrelease(&fd.csema)
   382  	return err
   383  }
   384  
   385  // Close closes the FD. The underlying file descriptor is closed by
   386  // the destroy method when there are no remaining references.
   387  func (fd *FD) Close() error {
   388  	if !fd.fdmu.increfAndClose() {
   389  		return errClosing(fd.isFile)
   390  	}
   391  	if fd.kind == kindPipe {
   392  		syscall.CancelIoEx(fd.Sysfd, nil)
   393  	}
   394  	// unblock pending reader and writer
   395  	fd.pd.evict()
   396  	err := fd.decref()
   397  	// Wait until the descriptor is closed. If this was the only
   398  	// reference, it is already closed.
   399  	runtime_Semacquire(&fd.csema)
   400  	return err
   401  }
   402  
   403  // Windows ReadFile and WSARecv use DWORD (uint32) parameter to pass buffer length.
   404  // This prevents us reading blocks larger than 4GB.
   405  // See golang.org/issue/26923.
   406  const maxRW = 1 << 30 // 1GB is large enough and keeps subsequent reads aligned
   407  
   408  // Read implements io.Reader.
   409  func (fd *FD) Read(buf []byte) (int, error) {
   410  	if err := fd.readLock(); err != nil {
   411  		return 0, err
   412  	}
   413  	defer fd.readUnlock()
   414  
   415  	if len(buf) > maxRW {
   416  		buf = buf[:maxRW]
   417  	}
   418  
   419  	var n int
   420  	var err error
   421  	if fd.isFile {
   422  		fd.l.Lock()
   423  		defer fd.l.Unlock()
   424  		switch fd.kind {
   425  		case kindConsole:
   426  			n, err = fd.readConsole(buf)
   427  		default:
   428  			n, err = syscall.Read(fd.Sysfd, buf)
   429  			if fd.kind == kindPipe && err == syscall.ERROR_OPERATION_ABORTED {
   430  				// Close uses CancelIoEx to interrupt concurrent I/O for pipes.
   431  				// If the fd is a pipe and the Read was interrupted by CancelIoEx,
   432  				// we assume it is interrupted by Close.
   433  				err = ErrFileClosing
   434  			}
   435  		}
   436  		if err != nil {
   437  			n = 0
   438  		}
   439  	} else {
   440  		o := &fd.rop
   441  		o.InitBuf(buf)
   442  		n, err = execIO(o, func(o *operation) error {
   443  			return syscall.WSARecv(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, &o.o, nil)
   444  		})
   445  		if race.Enabled {
   446  			race.Acquire(unsafe.Pointer(&ioSync))
   447  		}
   448  	}
   449  	if len(buf) != 0 {
   450  		err = fd.eofError(n, err)
   451  	}
   452  	return n, err
   453  }
   454  
   455  var ReadConsole = syscall.ReadConsole // changed for testing
   456  
   457  // readConsole reads utf16 characters from console File,
   458  // encodes them into utf8 and stores them in buffer b.
   459  // It returns the number of utf8 bytes read and an error, if any.
   460  func (fd *FD) readConsole(b []byte) (int, error) {
   461  	if len(b) == 0 {
   462  		return 0, nil
   463  	}
   464  
   465  	if fd.readuint16 == nil {
   466  		// Note: syscall.ReadConsole fails for very large buffers.
   467  		// The limit is somewhere around (but not exactly) 16384.
   468  		// Stay well below.
   469  		fd.readuint16 = make([]uint16, 0, 10000)
   470  		fd.readbyte = make([]byte, 0, 4*cap(fd.readuint16))
   471  	}
   472  
   473  	for fd.readbyteOffset >= len(fd.readbyte) {
   474  		n := cap(fd.readuint16) - len(fd.readuint16)
   475  		if n > len(b) {
   476  			n = len(b)
   477  		}
   478  		var nw uint32
   479  		err := ReadConsole(fd.Sysfd, &fd.readuint16[:len(fd.readuint16)+1][len(fd.readuint16)], uint32(n), &nw, nil)
   480  		if err != nil {
   481  			return 0, err
   482  		}
   483  		uint16s := fd.readuint16[:len(fd.readuint16)+int(nw)]
   484  		fd.readuint16 = fd.readuint16[:0]
   485  		buf := fd.readbyte[:0]
   486  		for i := 0; i < len(uint16s); i++ {
   487  			r := rune(uint16s[i])
   488  			if utf16.IsSurrogate(r) {
   489  				if i+1 == len(uint16s) {
   490  					if nw > 0 {
   491  						// Save half surrogate pair for next time.
   492  						fd.readuint16 = fd.readuint16[:1]
   493  						fd.readuint16[0] = uint16(r)
   494  						break
   495  					}
   496  					r = utf8.RuneError
   497  				} else {
   498  					r = utf16.DecodeRune(r, rune(uint16s[i+1]))
   499  					if r != utf8.RuneError {
   500  						i++
   501  					}
   502  				}
   503  			}
   504  			n := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
   505  			buf = buf[:len(buf)+n]
   506  		}
   507  		fd.readbyte = buf
   508  		fd.readbyteOffset = 0
   509  		if nw == 0 {
   510  			break
   511  		}
   512  	}
   513  
   514  	src := fd.readbyte[fd.readbyteOffset:]
   515  	var i int
   516  	for i = 0; i < len(src) && i < len(b); i++ {
   517  		x := src[i]
   518  		if x == 0x1A { // Ctrl-Z
   519  			if i == 0 {
   520  				fd.readbyteOffset++
   521  			}
   522  			break
   523  		}
   524  		b[i] = x
   525  	}
   526  	fd.readbyteOffset += i
   527  	return i, nil
   528  }
   529  
   530  // Pread emulates the Unix pread system call.
   531  func (fd *FD) Pread(b []byte, off int64) (int, error) {
   532  	// Call incref, not readLock, because since pread specifies the
   533  	// offset it is independent from other reads.
   534  	if err := fd.incref(); err != nil {
   535  		return 0, err
   536  	}
   537  	defer fd.decref()
   538  
   539  	if len(b) > maxRW {
   540  		b = b[:maxRW]
   541  	}
   542  
   543  	fd.l.Lock()
   544  	defer fd.l.Unlock()
   545  	curoffset, e := syscall.Seek(fd.Sysfd, 0, io.SeekCurrent)
   546  	if e != nil {
   547  		return 0, e
   548  	}
   549  	defer syscall.Seek(fd.Sysfd, curoffset, io.SeekStart)
   550  	o := syscall.Overlapped{
   551  		OffsetHigh: uint32(off >> 32),
   552  		Offset:     uint32(off),
   553  	}
   554  	var done uint32
   555  	e = syscall.ReadFile(fd.Sysfd, b, &done, &o)
   556  	if e != nil {
   557  		done = 0
   558  		if e == syscall.ERROR_HANDLE_EOF {
   559  			e = io.EOF
   560  		}
   561  	}
   562  	if len(b) != 0 {
   563  		e = fd.eofError(int(done), e)
   564  	}
   565  	return int(done), e
   566  }
   567  
   568  // ReadFrom wraps the recvfrom network call.
   569  func (fd *FD) ReadFrom(buf []byte) (int, syscall.Sockaddr, error) {
   570  	if len(buf) == 0 {
   571  		return 0, nil, nil
   572  	}
   573  	if len(buf) > maxRW {
   574  		buf = buf[:maxRW]
   575  	}
   576  	if err := fd.readLock(); err != nil {
   577  		return 0, nil, err
   578  	}
   579  	defer fd.readUnlock()
   580  	o := &fd.rop
   581  	o.InitBuf(buf)
   582  	n, err := execIO(o, func(o *operation) error {
   583  		if o.rsa == nil {
   584  			o.rsa = new(syscall.RawSockaddrAny)
   585  		}
   586  		o.rsan = int32(unsafe.Sizeof(*o.rsa))
   587  		return syscall.WSARecvFrom(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, o.rsa, &o.rsan, &o.o, nil)
   588  	})
   589  	err = fd.eofError(n, err)
   590  	if err != nil {
   591  		return n, nil, err
   592  	}
   593  	sa, _ := o.rsa.Sockaddr()
   594  	return n, sa, nil
   595  }
   596  
   597  // Write implements io.Writer.
   598  func (fd *FD) Write(buf []byte) (int, error) {
   599  	if err := fd.writeLock(); err != nil {
   600  		return 0, err
   601  	}
   602  	defer fd.writeUnlock()
   603  	if fd.isFile {
   604  		fd.l.Lock()
   605  		defer fd.l.Unlock()
   606  	}
   607  
   608  	ntotal := 0
   609  	for len(buf) > 0 {
   610  		b := buf
   611  		if len(b) > maxRW {
   612  			b = b[:maxRW]
   613  		}
   614  		var n int
   615  		var err error
   616  		if fd.isFile {
   617  			switch fd.kind {
   618  			case kindConsole:
   619  				n, err = fd.writeConsole(b)
   620  			default:
   621  				n, err = syscall.Write(fd.Sysfd, b)
   622  				if fd.kind == kindPipe && err == syscall.ERROR_OPERATION_ABORTED {
   623  					// Close uses CancelIoEx to interrupt concurrent I/O for pipes.
   624  					// If the fd is a pipe and the Write was interrupted by CancelIoEx,
   625  					// we assume it is interrupted by Close.
   626  					err = ErrFileClosing
   627  				}
   628  			}
   629  			if err != nil {
   630  				n = 0
   631  			}
   632  		} else {
   633  			if race.Enabled {
   634  				race.ReleaseMerge(unsafe.Pointer(&ioSync))
   635  			}
   636  			o := &fd.wop
   637  			o.InitBuf(b)
   638  			n, err = execIO(o, func(o *operation) error {
   639  				return syscall.WSASend(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, &o.o, nil)
   640  			})
   641  		}
   642  		ntotal += n
   643  		if err != nil {
   644  			return ntotal, err
   645  		}
   646  		buf = buf[n:]
   647  	}
   648  	return ntotal, nil
   649  }
   650  
   651  // writeConsole writes len(b) bytes to the console File.
   652  // It returns the number of bytes written and an error, if any.
   653  func (fd *FD) writeConsole(b []byte) (int, error) {
   654  	n := len(b)
   655  	runes := make([]rune, 0, 256)
   656  	if len(fd.lastbits) > 0 {
   657  		b = append(fd.lastbits, b...)
   658  		fd.lastbits = nil
   659  
   660  	}
   661  	for len(b) >= utf8.UTFMax || utf8.FullRune(b) {
   662  		r, l := utf8.DecodeRune(b)
   663  		runes = append(runes, r)
   664  		b = b[l:]
   665  	}
   666  	if len(b) > 0 {
   667  		fd.lastbits = make([]byte, len(b))
   668  		copy(fd.lastbits, b)
   669  	}
   670  	// syscall.WriteConsole seems to fail, if given large buffer.
   671  	// So limit the buffer to 16000 characters. This number was
   672  	// discovered by experimenting with syscall.WriteConsole.
   673  	const maxWrite = 16000
   674  	for len(runes) > 0 {
   675  		m := len(runes)
   676  		if m > maxWrite {
   677  			m = maxWrite
   678  		}
   679  		chunk := runes[:m]
   680  		runes = runes[m:]
   681  		uint16s := utf16.Encode(chunk)
   682  		for len(uint16s) > 0 {
   683  			var written uint32
   684  			err := syscall.WriteConsole(fd.Sysfd, &uint16s[0], uint32(len(uint16s)), &written, nil)
   685  			if err != nil {
   686  				return 0, err
   687  			}
   688  			uint16s = uint16s[written:]
   689  		}
   690  	}
   691  	return n, nil
   692  }
   693  
   694  // Pwrite emulates the Unix pwrite system call.
   695  func (fd *FD) Pwrite(buf []byte, off int64) (int, error) {
   696  	// Call incref, not writeLock, because since pwrite specifies the
   697  	// offset it is independent from other writes.
   698  	if err := fd.incref(); err != nil {
   699  		return 0, err
   700  	}
   701  	defer fd.decref()
   702  
   703  	fd.l.Lock()
   704  	defer fd.l.Unlock()
   705  	curoffset, e := syscall.Seek(fd.Sysfd, 0, io.SeekCurrent)
   706  	if e != nil {
   707  		return 0, e
   708  	}
   709  	defer syscall.Seek(fd.Sysfd, curoffset, io.SeekStart)
   710  
   711  	ntotal := 0
   712  	for len(buf) > 0 {
   713  		b := buf
   714  		if len(b) > maxRW {
   715  			b = b[:maxRW]
   716  		}
   717  		var n uint32
   718  		o := syscall.Overlapped{
   719  			OffsetHigh: uint32(off >> 32),
   720  			Offset:     uint32(off),
   721  		}
   722  		e = syscall.WriteFile(fd.Sysfd, b, &n, &o)
   723  		ntotal += int(n)
   724  		if e != nil {
   725  			return ntotal, e
   726  		}
   727  		buf = buf[n:]
   728  		off += int64(n)
   729  	}
   730  	return ntotal, nil
   731  }
   732  
   733  // Writev emulates the Unix writev system call.
   734  func (fd *FD) Writev(buf *[][]byte) (int64, error) {
   735  	if len(*buf) == 0 {
   736  		return 0, nil
   737  	}
   738  	if err := fd.writeLock(); err != nil {
   739  		return 0, err
   740  	}
   741  	defer fd.writeUnlock()
   742  	if race.Enabled {
   743  		race.ReleaseMerge(unsafe.Pointer(&ioSync))
   744  	}
   745  	o := &fd.wop
   746  	o.InitBufs(buf)
   747  	n, err := execIO(o, func(o *operation) error {
   748  		return syscall.WSASend(o.fd.Sysfd, &o.bufs[0], uint32(len(o.bufs)), &o.qty, 0, &o.o, nil)
   749  	})
   750  	o.ClearBufs()
   751  	TestHookDidWritev(n)
   752  	consume(buf, int64(n))
   753  	return int64(n), err
   754  }
   755  
   756  // WriteTo wraps the sendto network call.
   757  func (fd *FD) WriteTo(buf []byte, sa syscall.Sockaddr) (int, error) {
   758  	if err := fd.writeLock(); err != nil {
   759  		return 0, err
   760  	}
   761  	defer fd.writeUnlock()
   762  
   763  	if len(buf) == 0 {
   764  		// handle zero-byte payload
   765  		o := &fd.wop
   766  		o.InitBuf(buf)
   767  		o.sa = sa
   768  		n, err := execIO(o, func(o *operation) error {
   769  			return syscall.WSASendto(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, o.sa, &o.o, nil)
   770  		})
   771  		return n, err
   772  	}
   773  
   774  	ntotal := 0
   775  	for len(buf) > 0 {
   776  		b := buf
   777  		if len(b) > maxRW {
   778  			b = b[:maxRW]
   779  		}
   780  		o := &fd.wop
   781  		o.InitBuf(b)
   782  		o.sa = sa
   783  		n, err := execIO(o, func(o *operation) error {
   784  			return syscall.WSASendto(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, o.sa, &o.o, nil)
   785  		})
   786  		ntotal += int(n)
   787  		if err != nil {
   788  			return ntotal, err
   789  		}
   790  		buf = buf[n:]
   791  	}
   792  	return ntotal, nil
   793  }
   794  
   795  // Call ConnectEx. This doesn't need any locking, since it is only
   796  // called when the descriptor is first created. This is here rather
   797  // than in the net package so that it can use fd.wop.
   798  func (fd *FD) ConnectEx(ra syscall.Sockaddr) error {
   799  	o := &fd.wop
   800  	o.sa = ra
   801  	_, err := execIO(o, func(o *operation) error {
   802  		return ConnectExFunc(o.fd.Sysfd, o.sa, nil, 0, nil, &o.o)
   803  	})
   804  	return err
   805  }
   806  
   807  func (fd *FD) acceptOne(s syscall.Handle, rawsa []syscall.RawSockaddrAny, o *operation) (string, error) {
   808  	// Submit accept request.
   809  	o.handle = s
   810  	o.rsan = int32(unsafe.Sizeof(rawsa[0]))
   811  	_, err := execIO(o, func(o *operation) error {
   812  		return AcceptFunc(o.fd.Sysfd, o.handle, (*byte)(unsafe.Pointer(&rawsa[0])), 0, uint32(o.rsan), uint32(o.rsan), &o.qty, &o.o)
   813  	})
   814  	if err != nil {
   815  		CloseFunc(s)
   816  		return "acceptex", err
   817  	}
   818  
   819  	// Inherit properties of the listening socket.
   820  	err = syscall.Setsockopt(s, syscall.SOL_SOCKET, syscall.SO_UPDATE_ACCEPT_CONTEXT, (*byte)(unsafe.Pointer(&fd.Sysfd)), int32(unsafe.Sizeof(fd.Sysfd)))
   821  	if err != nil {
   822  		CloseFunc(s)
   823  		return "setsockopt", err
   824  	}
   825  
   826  	return "", nil
   827  }
   828  
   829  // Accept handles accepting a socket. The sysSocket parameter is used
   830  // to allocate the net socket.
   831  func (fd *FD) Accept(sysSocket func() (syscall.Handle, error)) (syscall.Handle, []syscall.RawSockaddrAny, uint32, string, error) {
   832  	if err := fd.readLock(); err != nil {
   833  		return syscall.InvalidHandle, nil, 0, "", err
   834  	}
   835  	defer fd.readUnlock()
   836  
   837  	o := &fd.rop
   838  	var rawsa [2]syscall.RawSockaddrAny
   839  	for {
   840  		s, err := sysSocket()
   841  		if err != nil {
   842  			return syscall.InvalidHandle, nil, 0, "", err
   843  		}
   844  
   845  		errcall, err := fd.acceptOne(s, rawsa[:], o)
   846  		if err == nil {
   847  			return s, rawsa[:], uint32(o.rsan), "", nil
   848  		}
   849  
   850  		// Sometimes we see WSAECONNRESET and ERROR_NETNAME_DELETED is
   851  		// returned here. These happen if connection reset is received
   852  		// before AcceptEx could complete. These errors relate to new
   853  		// connection, not to AcceptEx, so ignore broken connection and
   854  		// try AcceptEx again for more connections.
   855  		errno, ok := err.(syscall.Errno)
   856  		if !ok {
   857  			return syscall.InvalidHandle, nil, 0, errcall, err
   858  		}
   859  		switch errno {
   860  		case syscall.ERROR_NETNAME_DELETED, syscall.WSAECONNRESET:
   861  			// ignore these and try again
   862  		default:
   863  			return syscall.InvalidHandle, nil, 0, errcall, err
   864  		}
   865  	}
   866  }
   867  
   868  // Seek wraps syscall.Seek.
   869  func (fd *FD) Seek(offset int64, whence int) (int64, error) {
   870  	if err := fd.incref(); err != nil {
   871  		return 0, err
   872  	}
   873  	defer fd.decref()
   874  
   875  	fd.l.Lock()
   876  	defer fd.l.Unlock()
   877  
   878  	return syscall.Seek(fd.Sysfd, offset, whence)
   879  }
   880  
   881  // FindNextFile wraps syscall.FindNextFile.
   882  func (fd *FD) FindNextFile(data *syscall.Win32finddata) error {
   883  	if err := fd.incref(); err != nil {
   884  		return err
   885  	}
   886  	defer fd.decref()
   887  	return syscall.FindNextFile(fd.Sysfd, data)
   888  }
   889  
   890  // Fchmod updates syscall.ByHandleFileInformation.Fileattributes when needed.
   891  func (fd *FD) Fchmod(mode uint32) error {
   892  	if err := fd.incref(); err != nil {
   893  		return err
   894  	}
   895  	defer fd.decref()
   896  
   897  	var d syscall.ByHandleFileInformation
   898  	if err := syscall.GetFileInformationByHandle(fd.Sysfd, &d); err != nil {
   899  		return err
   900  	}
   901  	attrs := d.FileAttributes
   902  	if mode&syscall.S_IWRITE != 0 {
   903  		attrs &^= syscall.FILE_ATTRIBUTE_READONLY
   904  	} else {
   905  		attrs |= syscall.FILE_ATTRIBUTE_READONLY
   906  	}
   907  	if attrs == d.FileAttributes {
   908  		return nil
   909  	}
   910  
   911  	var du windows.FILE_BASIC_INFO
   912  	du.FileAttributes = attrs
   913  	l := uint32(unsafe.Sizeof(d))
   914  	return windows.SetFileInformationByHandle(fd.Sysfd, windows.FileBasicInfo, uintptr(unsafe.Pointer(&du)), l)
   915  }
   916  
   917  // Fchdir wraps syscall.Fchdir.
   918  func (fd *FD) Fchdir() error {
   919  	if err := fd.incref(); err != nil {
   920  		return err
   921  	}
   922  	defer fd.decref()
   923  	return syscall.Fchdir(fd.Sysfd)
   924  }
   925  
   926  // GetFileType wraps syscall.GetFileType.
   927  func (fd *FD) GetFileType() (uint32, error) {
   928  	if err := fd.incref(); err != nil {
   929  		return 0, err
   930  	}
   931  	defer fd.decref()
   932  	return syscall.GetFileType(fd.Sysfd)
   933  }
   934  
   935  // GetFileInformationByHandle wraps GetFileInformationByHandle.
   936  func (fd *FD) GetFileInformationByHandle(data *syscall.ByHandleFileInformation) error {
   937  	if err := fd.incref(); err != nil {
   938  		return err
   939  	}
   940  	defer fd.decref()
   941  	return syscall.GetFileInformationByHandle(fd.Sysfd, data)
   942  }
   943  
   944  // RawRead invokes the user-defined function f for a read operation.
   945  func (fd *FD) RawRead(f func(uintptr) bool) error {
   946  	if err := fd.readLock(); err != nil {
   947  		return err
   948  	}
   949  	defer fd.readUnlock()
   950  	for {
   951  		if f(uintptr(fd.Sysfd)) {
   952  			return nil
   953  		}
   954  
   955  		// Use a zero-byte read as a way to get notified when this
   956  		// socket is readable. h/t https://stackoverflow.com/a/42019668/332798
   957  		o := &fd.rop
   958  		o.InitBuf(nil)
   959  		if !fd.IsStream {
   960  			o.flags |= windows.MSG_PEEK
   961  		}
   962  		_, err := execIO(o, func(o *operation) error {
   963  			return syscall.WSARecv(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, &o.o, nil)
   964  		})
   965  		if err == windows.WSAEMSGSIZE {
   966  			// expected with a 0-byte peek, ignore.
   967  		} else if err != nil {
   968  			return err
   969  		}
   970  	}
   971  }
   972  
   973  // RawWrite invokes the user-defined function f for a write operation.
   974  func (fd *FD) RawWrite(f func(uintptr) bool) error {
   975  	if err := fd.writeLock(); err != nil {
   976  		return err
   977  	}
   978  	defer fd.writeUnlock()
   979  
   980  	if f(uintptr(fd.Sysfd)) {
   981  		return nil
   982  	}
   983  
   984  	// TODO(tmm1): find a way to detect socket writability
   985  	return syscall.EWINDOWS
   986  }
   987  
   988  func sockaddrToRaw(sa syscall.Sockaddr) (unsafe.Pointer, int32, error) {
   989  	switch sa := sa.(type) {
   990  	case *syscall.SockaddrInet4:
   991  		var raw syscall.RawSockaddrInet4
   992  		raw.Family = syscall.AF_INET
   993  		p := (*[2]byte)(unsafe.Pointer(&raw.Port))
   994  		p[0] = byte(sa.Port >> 8)
   995  		p[1] = byte(sa.Port)
   996  		for i := 0; i < len(sa.Addr); i++ {
   997  			raw.Addr[i] = sa.Addr[i]
   998  		}
   999  		return unsafe.Pointer(&raw), int32(unsafe.Sizeof(raw)), nil
  1000  	case *syscall.SockaddrInet6:
  1001  		var raw syscall.RawSockaddrInet6
  1002  		raw.Family = syscall.AF_INET6
  1003  		p := (*[2]byte)(unsafe.Pointer(&raw.Port))
  1004  		p[0] = byte(sa.Port >> 8)
  1005  		p[1] = byte(sa.Port)
  1006  		raw.Scope_id = sa.ZoneId
  1007  		for i := 0; i < len(sa.Addr); i++ {
  1008  			raw.Addr[i] = sa.Addr[i]
  1009  		}
  1010  		return unsafe.Pointer(&raw), int32(unsafe.Sizeof(raw)), nil
  1011  	default:
  1012  		return nil, 0, syscall.EWINDOWS
  1013  	}
  1014  }
  1015  
  1016  // ReadMsg wraps the WSARecvMsg network call.
  1017  func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) {
  1018  	if err := fd.readLock(); err != nil {
  1019  		return 0, 0, 0, nil, err
  1020  	}
  1021  	defer fd.readUnlock()
  1022  
  1023  	if len(p) > maxRW {
  1024  		p = p[:maxRW]
  1025  	}
  1026  
  1027  	o := &fd.rop
  1028  	o.InitMsg(p, oob)
  1029  	o.rsa = new(syscall.RawSockaddrAny)
  1030  	o.msg.Name = (syscall.Pointer)(unsafe.Pointer(o.rsa))
  1031  	o.msg.Namelen = int32(unsafe.Sizeof(*o.rsa))
  1032  	n, err := execIO(o, func(o *operation) error {
  1033  		return windows.WSARecvMsg(o.fd.Sysfd, &o.msg, &o.qty, &o.o, nil)
  1034  	})
  1035  	err = fd.eofError(n, err)
  1036  	var sa syscall.Sockaddr
  1037  	if err == nil {
  1038  		sa, err = o.rsa.Sockaddr()
  1039  	}
  1040  	return n, int(o.msg.Control.Len), int(o.msg.Flags), sa, err
  1041  }
  1042  
  1043  // WriteMsg wraps the WSASendMsg network call.
  1044  func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
  1045  	if len(p) > maxRW {
  1046  		return 0, 0, errors.New("packet is too large (only 1GB is allowed)")
  1047  	}
  1048  
  1049  	if err := fd.writeLock(); err != nil {
  1050  		return 0, 0, err
  1051  	}
  1052  	defer fd.writeUnlock()
  1053  
  1054  	o := &fd.wop
  1055  	o.InitMsg(p, oob)
  1056  	if sa != nil {
  1057  		rsa, len, err := sockaddrToRaw(sa)
  1058  		if err != nil {
  1059  			return 0, 0, err
  1060  		}
  1061  		o.msg.Name = (syscall.Pointer)(rsa)
  1062  		o.msg.Namelen = len
  1063  	}
  1064  	n, err := execIO(o, func(o *operation) error {
  1065  		return windows.WSASendMsg(o.fd.Sysfd, &o.msg, 0, &o.qty, &o.o, nil)
  1066  	})
  1067  	return n, int(o.msg.Control.Len), err
  1068  }