github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/unet/unet_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package unet
    16  
    17  import (
    18  	"io"
    19  	"sync/atomic"
    20  	"unsafe"
    21  
    22  	"golang.org/x/sys/unix"
    23  )
    24  
    25  // wait blocks until the socket FD is ready for reading or writing, depending
    26  // on the value of write.
    27  //
    28  // Returns errClosing if the Socket is in the process of closing.
    29  func (s *Socket) wait(write bool) error {
    30  	for {
    31  		// Checking the FD on each loop is not strictly necessary, it
    32  		// just avoids an extra poll call.
    33  		fd := atomic.LoadInt32(&s.fd)
    34  		if fd < 0 {
    35  			return errClosing
    36  		}
    37  
    38  		events := []unix.PollFd{
    39  			{
    40  				// The actual socket FD.
    41  				Fd:     fd,
    42  				Events: unix.POLLIN,
    43  			},
    44  			{
    45  				// The eventfd, signaled when we are closing.
    46  				Fd:     int32(s.efd),
    47  				Events: unix.POLLIN,
    48  			},
    49  		}
    50  		if write {
    51  			events[0].Events = unix.POLLOUT
    52  		}
    53  
    54  		_, _, e := unix.Syscall6(unix.SYS_PPOLL, uintptr(unsafe.Pointer(&events[0])), 2, 0, 0, 0, 0)
    55  		if e == unix.EINTR {
    56  			continue
    57  		}
    58  		if e != 0 {
    59  			return e
    60  		}
    61  
    62  		if events[1].Revents&unix.POLLIN == unix.POLLIN {
    63  			// eventfd signaled, we're closing.
    64  			return errClosing
    65  		}
    66  
    67  		return nil
    68  	}
    69  }
    70  
    71  // buildIovec builds an iovec slice from the given []byte slice.
    72  //
    73  // iovecs is used as an initial slice, to avoid excessive allocations.
    74  func buildIovec(bufs [][]byte, iovecs []unix.Iovec) ([]unix.Iovec, int) {
    75  	var length int
    76  	for i := range bufs {
    77  		if l := len(bufs[i]); l > 0 {
    78  			iovecs = append(iovecs, unix.Iovec{
    79  				Base: &bufs[i][0],
    80  				Len:  uint64(l),
    81  			})
    82  			length += l
    83  		}
    84  	}
    85  	return iovecs, length
    86  }
    87  
    88  // ReadVec reads into the pre-allocated bufs. Returns bytes read.
    89  //
    90  // The pre-allocatted space used by ReadVec is based upon slice lengths.
    91  //
    92  // This function is not guaranteed to read all available data, it
    93  // returns as soon as a single recvmsg call succeeds.
    94  func (r *SocketReader) ReadVec(bufs [][]byte) (int, error) {
    95  	iovecs, length := buildIovec(bufs, make([]unix.Iovec, 0, 2))
    96  
    97  	var msg unix.Msghdr
    98  	if len(r.source) != 0 {
    99  		msg.Name = &r.source[0]
   100  		msg.Namelen = uint32(len(r.source))
   101  	}
   102  
   103  	if len(r.ControlMessage) != 0 {
   104  		msg.Control = &r.ControlMessage[0]
   105  		msg.Controllen = uint64(len(r.ControlMessage))
   106  	}
   107  
   108  	if len(iovecs) != 0 {
   109  		msg.Iov = &iovecs[0]
   110  		msg.Iovlen = uint64(len(iovecs))
   111  	}
   112  
   113  	// n is the bytes received.
   114  	var n uintptr
   115  
   116  	fd, ok := r.socket.enterFD()
   117  	if !ok {
   118  		return 0, unix.EBADF
   119  	}
   120  	// Leave on returns below.
   121  	for {
   122  		var e unix.Errno
   123  
   124  		// Try a non-blocking recv first, so we don't give up the go runtime M.
   125  		n, _, e = unix.RawSyscall(unix.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_TRUNC)
   126  		if e == 0 {
   127  			break
   128  		}
   129  		if e == unix.EINTR {
   130  			continue
   131  		}
   132  		if !r.blocking {
   133  			r.socket.gate.Leave()
   134  			return 0, e
   135  		}
   136  		if e != unix.EAGAIN && e != unix.EWOULDBLOCK {
   137  			r.socket.gate.Leave()
   138  			return 0, e
   139  		}
   140  
   141  		// Wait for the socket to become readable.
   142  		err := r.socket.wait(false)
   143  		if err == errClosing {
   144  			err = unix.EBADF
   145  		}
   146  		if err != nil {
   147  			r.socket.gate.Leave()
   148  			return 0, err
   149  		}
   150  	}
   151  
   152  	r.socket.gate.Leave()
   153  
   154  	if msg.Controllen < uint64(len(r.ControlMessage)) {
   155  		r.ControlMessage = r.ControlMessage[:msg.Controllen]
   156  	}
   157  
   158  	if msg.Namelen < uint32(len(r.source)) {
   159  		r.source = r.source[:msg.Namelen]
   160  	}
   161  
   162  	// All unet sockets are SOCK_STREAM or SOCK_SEQPACKET, both of which
   163  	// indicate that the other end is closed by returning a 0 length read
   164  	// with no error.
   165  	if n == 0 {
   166  		return 0, io.EOF
   167  	}
   168  
   169  	if r.race != nil {
   170  		// See comments on Socket.race.
   171  		atomic.AddInt32(r.race, 1)
   172  	}
   173  
   174  	if int(n) > length {
   175  		return length, errMessageTruncated
   176  	}
   177  
   178  	return int(n), nil
   179  }
   180  
   181  // WriteVec writes the bufs to the socket. Returns bytes written.
   182  //
   183  // This function is not guaranteed to send all data, it returns
   184  // as soon as a single sendmsg call succeeds.
   185  func (w *SocketWriter) WriteVec(bufs [][]byte) (int, error) {
   186  	iovecs, _ := buildIovec(bufs, make([]unix.Iovec, 0, 2))
   187  
   188  	if w.race != nil {
   189  		// See comments on Socket.race.
   190  		atomic.AddInt32(w.race, 1)
   191  	}
   192  
   193  	var msg unix.Msghdr
   194  	if len(w.to) != 0 {
   195  		msg.Name = &w.to[0]
   196  		msg.Namelen = uint32(len(w.to))
   197  	}
   198  
   199  	if len(w.ControlMessage) != 0 {
   200  		msg.Control = &w.ControlMessage[0]
   201  		msg.Controllen = uint64(len(w.ControlMessage))
   202  	}
   203  
   204  	if len(iovecs) > 0 {
   205  		msg.Iov = &iovecs[0]
   206  		msg.Iovlen = uint64(len(iovecs))
   207  	}
   208  
   209  	fd, ok := w.socket.enterFD()
   210  	if !ok {
   211  		return 0, unix.EBADF
   212  	}
   213  	// Leave on returns below.
   214  	for {
   215  		// Try a non-blocking send first, so we don't give up the go runtime M.
   216  		n, _, e := unix.RawSyscall(unix.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_NOSIGNAL)
   217  		if e == 0 {
   218  			w.socket.gate.Leave()
   219  			return int(n), nil
   220  		}
   221  		if e == unix.EINTR {
   222  			continue
   223  		}
   224  		if !w.blocking {
   225  			w.socket.gate.Leave()
   226  			return 0, e
   227  		}
   228  		if e != unix.EAGAIN && e != unix.EWOULDBLOCK {
   229  			w.socket.gate.Leave()
   230  			return 0, e
   231  		}
   232  
   233  		// Wait for the socket to become writeable.
   234  		err := w.socket.wait(true)
   235  		if err == errClosing {
   236  			err = unix.EBADF
   237  		}
   238  		if err != nil {
   239  			w.socket.gate.Leave()
   240  			return 0, err
   241  		}
   242  	}
   243  	// Unreachable, no s.gate.Leave needed.
   244  }
   245  
   246  // getsockopt issues a getsockopt unix.
   247  func getsockopt(fd int, level int, optname int, buf []byte) (uint32, error) {
   248  	l := uint32(len(buf))
   249  	_, _, e := unix.RawSyscall6(unix.SYS_GETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)), 0)
   250  	if e != 0 {
   251  		return 0, e
   252  	}
   253  
   254  	return l, nil
   255  }
   256  
   257  // setsockopt issues a setsockopt unix.
   258  func setsockopt(fd int, level int, optname int, buf []byte) error {
   259  	_, _, e := unix.RawSyscall6(unix.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf)), 0)
   260  	if e != 0 {
   261  		return e
   262  	}
   263  
   264  	return nil
   265  }
   266  
   267  // getsockname issues a getsockname unix.
   268  func getsockname(fd int, buf []byte) (uint32, error) {
   269  	l := uint32(len(buf))
   270  	_, _, e := unix.RawSyscall(unix.SYS_GETSOCKNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)))
   271  	if e != 0 {
   272  		return 0, e
   273  	}
   274  
   275  	return l, nil
   276  }
   277  
   278  // getpeername issues a getpeername unix.
   279  func getpeername(fd int, buf []byte) (uint32, error) {
   280  	l := uint32(len(buf))
   281  	_, _, e := unix.RawSyscall(unix.SYS_GETPEERNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)))
   282  	if e != 0 {
   283  		return 0, e
   284  	}
   285  
   286  	return l, nil
   287  }