github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/unet/unet_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package unet
    16  
    17  import (
    18  	"io"
    19  	"unsafe"
    20  
    21  	"golang.org/x/sys/unix"
    22  )
    23  
    24  // wait blocks until the socket FD is ready for reading or writing, depending
    25  // on the value of write.
    26  //
    27  // Returns errClosing if the Socket is in the process of closing.
    28  func (s *Socket) wait(write bool) error {
    29  	for {
    30  		// Checking the FD on each loop is not strictly necessary, it
    31  		// just avoids an extra poll call.
    32  		fd := s.fd.Load()
    33  		if fd < 0 {
    34  			return errClosing
    35  		}
    36  
    37  		events := []unix.PollFd{
    38  			{
    39  				// The actual socket FD.
    40  				Fd:     fd,
    41  				Events: unix.POLLIN,
    42  			},
    43  			{
    44  				// The eventfd, signaled when we are closing.
    45  				Fd:     int32(s.efd.FD()),
    46  				Events: unix.POLLIN,
    47  			},
    48  		}
    49  		if write {
    50  			events[0].Events = unix.POLLOUT
    51  		}
    52  
    53  		_, _, e := unix.Syscall6(unix.SYS_PPOLL, uintptr(unsafe.Pointer(&events[0])), 2, 0, 0, 0, 0)
    54  		if e == unix.EINTR {
    55  			continue
    56  		}
    57  		if e != 0 {
    58  			return e
    59  		}
    60  
    61  		if events[1].Revents&unix.POLLIN == unix.POLLIN {
    62  			// eventfd signaled, we're closing.
    63  			return errClosing
    64  		}
    65  
    66  		return nil
    67  	}
    68  }
    69  
    70  // buildIovec builds an iovec slice from the given []byte slice.
    71  //
    72  // iovecs is used as an initial slice, to avoid excessive allocations.
    73  func buildIovec(bufs [][]byte, iovecs []unix.Iovec) ([]unix.Iovec, int) {
    74  	var length int
    75  	for i := range bufs {
    76  		if l := len(bufs[i]); l > 0 {
    77  			iovecs = append(iovecs, unix.Iovec{
    78  				Base: &bufs[i][0],
    79  				Len:  uint64(l),
    80  			})
    81  			length += l
    82  		}
    83  	}
    84  	return iovecs, length
    85  }
    86  
    87  // ReadVec reads into the pre-allocated bufs. Returns bytes read.
    88  //
    89  // The pre-allocatted space used by ReadVec is based upon slice lengths.
    90  //
    91  // This function is not guaranteed to read all available data, it
    92  // returns as soon as a single recvmsg call succeeds.
    93  func (r *SocketReader) ReadVec(bufs [][]byte) (int, error) {
    94  	iovecs, length := buildIovec(bufs, make([]unix.Iovec, 0, 2))
    95  
    96  	var msg unix.Msghdr
    97  	if len(r.source) != 0 {
    98  		msg.Name = &r.source[0]
    99  		msg.Namelen = uint32(len(r.source))
   100  	}
   101  
   102  	if len(r.ControlMessage) != 0 {
   103  		msg.Control = &r.ControlMessage[0]
   104  		msg.Controllen = uint64(len(r.ControlMessage))
   105  	}
   106  
   107  	if len(iovecs) != 0 {
   108  		msg.Iov = &iovecs[0]
   109  		msg.Iovlen = uint64(len(iovecs))
   110  	}
   111  
   112  	// n is the bytes received.
   113  	var n uintptr
   114  
   115  	fd, ok := r.socket.enterFD()
   116  	if !ok {
   117  		return 0, unix.EBADF
   118  	}
   119  	// Leave on returns below.
   120  	for {
   121  		var e unix.Errno
   122  
   123  		// Try a non-blocking recv first, so we don't give up the go runtime M.
   124  		n, _, e = unix.RawSyscall(unix.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_TRUNC)
   125  		if e == 0 {
   126  			break
   127  		}
   128  		if e == unix.EINTR {
   129  			continue
   130  		}
   131  		if !r.blocking {
   132  			r.socket.gate.Leave()
   133  			return 0, e
   134  		}
   135  		if e != unix.EAGAIN && e != unix.EWOULDBLOCK {
   136  			r.socket.gate.Leave()
   137  			return 0, e
   138  		}
   139  
   140  		// Wait for the socket to become readable.
   141  		err := r.socket.wait(false)
   142  		if err == errClosing {
   143  			err = unix.EBADF
   144  		}
   145  		if err != nil {
   146  			r.socket.gate.Leave()
   147  			return 0, err
   148  		}
   149  	}
   150  
   151  	r.socket.gate.Leave()
   152  
   153  	if msg.Controllen < uint64(len(r.ControlMessage)) {
   154  		r.ControlMessage = r.ControlMessage[:msg.Controllen]
   155  	}
   156  
   157  	if msg.Namelen < uint32(len(r.source)) {
   158  		r.source = r.source[:msg.Namelen]
   159  	}
   160  
   161  	// All unet sockets are SOCK_STREAM or SOCK_SEQPACKET, both of which
   162  	// indicate that the other end is closed by returning a 0 length read
   163  	// with no error.
   164  	if n == 0 {
   165  		return 0, io.EOF
   166  	}
   167  
   168  	if r.race != nil {
   169  		// See comments on Socket.race.
   170  		r.race.Add(1)
   171  	}
   172  
   173  	if int(n) > length {
   174  		return length, errMessageTruncated
   175  	}
   176  
   177  	return int(n), nil
   178  }
   179  
   180  // WriteVec writes the bufs to the socket. Returns bytes written.
   181  //
   182  // This function is not guaranteed to send all data, it returns
   183  // as soon as a single sendmsg call succeeds.
   184  func (w *SocketWriter) WriteVec(bufs [][]byte) (int, error) {
   185  	iovecs, _ := buildIovec(bufs, make([]unix.Iovec, 0, 2))
   186  
   187  	if w.race != nil {
   188  		// See comments on Socket.race.
   189  		w.race.Add(1)
   190  	}
   191  
   192  	var msg unix.Msghdr
   193  	if len(w.to) != 0 {
   194  		msg.Name = &w.to[0]
   195  		msg.Namelen = uint32(len(w.to))
   196  	}
   197  
   198  	if len(w.ControlMessage) != 0 {
   199  		msg.Control = &w.ControlMessage[0]
   200  		msg.Controllen = uint64(len(w.ControlMessage))
   201  	}
   202  
   203  	if len(iovecs) > 0 {
   204  		msg.Iov = &iovecs[0]
   205  		msg.Iovlen = uint64(len(iovecs))
   206  	}
   207  
   208  	fd, ok := w.socket.enterFD()
   209  	if !ok {
   210  		return 0, unix.EBADF
   211  	}
   212  	// Leave on returns below.
   213  	for {
   214  		// Try a non-blocking send first, so we don't give up the go runtime M.
   215  		n, _, e := unix.RawSyscall(unix.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), unix.MSG_DONTWAIT|unix.MSG_NOSIGNAL)
   216  		if e == 0 {
   217  			w.socket.gate.Leave()
   218  			return int(n), nil
   219  		}
   220  		if e == unix.EINTR {
   221  			continue
   222  		}
   223  		if !w.blocking {
   224  			w.socket.gate.Leave()
   225  			return 0, e
   226  		}
   227  		if e != unix.EAGAIN && e != unix.EWOULDBLOCK {
   228  			w.socket.gate.Leave()
   229  			return 0, e
   230  		}
   231  
   232  		// Wait for the socket to become writeable.
   233  		err := w.socket.wait(true)
   234  		if err == errClosing {
   235  			err = unix.EBADF
   236  		}
   237  		if err != nil {
   238  			w.socket.gate.Leave()
   239  			return 0, err
   240  		}
   241  	}
   242  	// Unreachable, no s.gate.Leave needed.
   243  }
   244  
   245  // getsockopt issues a getsockopt unix.
   246  func getsockopt(fd int, level int, optname int, buf []byte) (uint32, error) {
   247  	l := uint32(len(buf))
   248  	_, _, e := unix.RawSyscall6(unix.SYS_GETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)), 0)
   249  	if e != 0 {
   250  		return 0, e
   251  	}
   252  
   253  	return l, nil
   254  }
   255  
   256  // setsockopt issues a setsockopt unix.
   257  func setsockopt(fd int, level int, optname int, buf []byte) error {
   258  	_, _, e := unix.RawSyscall6(unix.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(&buf[0])), uintptr(len(buf)), 0)
   259  	if e != 0 {
   260  		return e
   261  	}
   262  
   263  	return nil
   264  }
   265  
   266  // getsockname issues a getsockname unix.
   267  func getsockname(fd int, buf []byte) (uint32, error) {
   268  	l := uint32(len(buf))
   269  	_, _, e := unix.RawSyscall(unix.SYS_GETSOCKNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)))
   270  	if e != 0 {
   271  		return 0, e
   272  	}
   273  
   274  	return l, nil
   275  }
   276  
   277  // getpeername issues a getpeername unix.
   278  func getpeername(fd int, buf []byte) (uint32, error) {
   279  	l := uint32(len(buf))
   280  	_, _, e := unix.RawSyscall(unix.SYS_GETPEERNAME, uintptr(fd), uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&l)))
   281  	if e != 0 {
   282  		return 0, e
   283  	}
   284  
   285  	return l, nil
   286  }