github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/rawfile/rawfile_unsafe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  // Package rawfile contains utilities for using the netstack with raw host
    18  // files on Linux hosts.
    19  package rawfile
    20  
    21  import (
    22  	"reflect"
    23  	"unsafe"
    24  
    25  	"golang.org/x/sys/unix"
    26  	"github.com/SagerNet/gvisor/pkg/tcpip"
    27  )
    28  
    29  // SizeofIovec is the size of a unix.Iovec in bytes.
    30  const SizeofIovec = unsafe.Sizeof(unix.Iovec{})
    31  
    32  // MaxIovs is UIO_MAXIOV, the maximum number of iovecs that may be passed to a
    33  // host system call in a single array.
    34  const MaxIovs = 1024
    35  
    36  // IovecFromBytes returns a unix.Iovec representing bs.
    37  //
    38  // Preconditions: len(bs) > 0.
    39  func IovecFromBytes(bs []byte) unix.Iovec {
    40  	iov := unix.Iovec{
    41  		Base: &bs[0],
    42  	}
    43  	iov.SetLen(len(bs))
    44  	return iov
    45  }
    46  
    47  func bytesFromIovec(iov unix.Iovec) (bs []byte) {
    48  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&bs))
    49  	sh.Data = uintptr(unsafe.Pointer(iov.Base))
    50  	sh.Len = int(iov.Len)
    51  	sh.Cap = int(iov.Len)
    52  	return
    53  }
    54  
    55  // AppendIovecFromBytes returns append(iovs, IovecFromBytes(bs)). If len(bs) ==
    56  // 0, AppendIovecFromBytes returns iovs without modification. If len(iovs) >=
    57  // max, AppendIovecFromBytes replaces the final iovec in iovs with one that
    58  // also includes the contents of bs. Note that this implies that
    59  // AppendIovecFromBytes is only usable when the returned iovec slice is used as
    60  // the source of a write.
    61  func AppendIovecFromBytes(iovs []unix.Iovec, bs []byte, max int) []unix.Iovec {
    62  	if len(bs) == 0 {
    63  		return iovs
    64  	}
    65  	if len(iovs) < max {
    66  		return append(iovs, IovecFromBytes(bs))
    67  	}
    68  	iovs[len(iovs)-1] = IovecFromBytes(append(bytesFromIovec(iovs[len(iovs)-1]), bs...))
    69  	return iovs
    70  }
    71  
    72  // MMsgHdr represents the mmsg_hdr structure required by recvmmsg() on linux.
    73  type MMsgHdr struct {
    74  	Msg unix.Msghdr
    75  	Len uint32
    76  	_   [4]byte
    77  }
    78  
    79  // SizeofMMsgHdr is the size of a MMsgHdr in bytes.
    80  const SizeofMMsgHdr = unsafe.Sizeof(MMsgHdr{})
    81  
    82  // GetMTU determines the MTU of a network interface device.
    83  func GetMTU(name string) (uint32, error) {
    84  	fd, err := unix.Socket(unix.AF_UNIX, unix.SOCK_DGRAM, 0)
    85  	if err != nil {
    86  		return 0, err
    87  	}
    88  
    89  	defer unix.Close(fd)
    90  
    91  	var ifreq struct {
    92  		name [16]byte
    93  		mtu  int32
    94  		_    [20]byte
    95  	}
    96  
    97  	copy(ifreq.name[:], name)
    98  	_, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), unix.SIOCGIFMTU, uintptr(unsafe.Pointer(&ifreq)))
    99  	if errno != 0 {
   100  		return 0, errno
   101  	}
   102  
   103  	return uint32(ifreq.mtu), nil
   104  }
   105  
   106  // NonBlockingWrite writes the given buffer to a file descriptor. It fails if
   107  // partial data is written.
   108  func NonBlockingWrite(fd int, buf []byte) tcpip.Error {
   109  	var ptr unsafe.Pointer
   110  	if len(buf) > 0 {
   111  		ptr = unsafe.Pointer(&buf[0])
   112  	}
   113  
   114  	_, _, e := unix.RawSyscall(unix.SYS_WRITE, uintptr(fd), uintptr(ptr), uintptr(len(buf)))
   115  	if e != 0 {
   116  		return TranslateErrno(e)
   117  	}
   118  
   119  	return nil
   120  }
   121  
   122  // NonBlockingWriteIovec writes iovec to a file descriptor in a single unix.
   123  // It fails if partial data is written.
   124  func NonBlockingWriteIovec(fd int, iovec []unix.Iovec) tcpip.Error {
   125  	iovecLen := uintptr(len(iovec))
   126  	_, _, e := unix.RawSyscall(unix.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen)
   127  	if e != 0 {
   128  		return TranslateErrno(e)
   129  	}
   130  	return nil
   131  }
   132  
   133  // NonBlockingSendMMsg sends multiple messages on a socket.
   134  func NonBlockingSendMMsg(fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) {
   135  	n, _, e := unix.RawSyscall6(unix.SYS_SENDMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), unix.MSG_DONTWAIT, 0, 0)
   136  	if e != 0 {
   137  		return 0, TranslateErrno(e)
   138  	}
   139  
   140  	return int(n), nil
   141  }
   142  
   143  // PollEvent represents the pollfd structure passed to a poll() system call.
   144  type PollEvent struct {
   145  	FD      int32
   146  	Events  int16
   147  	Revents int16
   148  }
   149  
   150  // BlockingRead reads from a file descriptor that is set up as non-blocking. If
   151  // no data is available, it will block in a poll() syscall until the file
   152  // descriptor becomes readable.
   153  func BlockingRead(fd int, b []byte) (int, tcpip.Error) {
   154  	for {
   155  		n, _, e := unix.RawSyscall(unix.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)))
   156  		if e == 0 {
   157  			return int(n), nil
   158  		}
   159  
   160  		event := PollEvent{
   161  			FD:     int32(fd),
   162  			Events: 1, // POLLIN
   163  		}
   164  
   165  		_, e = BlockingPoll(&event, 1, nil)
   166  		if e != 0 && e != unix.EINTR {
   167  			return 0, TranslateErrno(e)
   168  		}
   169  	}
   170  }
   171  
   172  // BlockingReadv reads from a file descriptor that is set up as non-blocking and
   173  // stores the data in a list of iovecs buffers. If no data is available, it will
   174  // block in a poll() syscall until the file descriptor becomes readable.
   175  func BlockingReadv(fd int, iovecs []unix.Iovec) (int, tcpip.Error) {
   176  	for {
   177  		n, _, e := unix.RawSyscall(unix.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)))
   178  		if e == 0 {
   179  			return int(n), nil
   180  		}
   181  
   182  		event := PollEvent{
   183  			FD:     int32(fd),
   184  			Events: 1, // POLLIN
   185  		}
   186  
   187  		_, e = BlockingPoll(&event, 1, nil)
   188  		if e != 0 && e != unix.EINTR {
   189  			return 0, TranslateErrno(e)
   190  		}
   191  	}
   192  }
   193  
   194  // BlockingRecvMMsg reads from a file descriptor that is set up as non-blocking
   195  // and stores the received messages in a slice of MMsgHdr structures. If no data
   196  // is available, it will block in a poll() syscall until the file descriptor
   197  // becomes readable.
   198  func BlockingRecvMMsg(fd int, msgHdrs []MMsgHdr) (int, tcpip.Error) {
   199  	for {
   200  		n, _, e := unix.RawSyscall6(unix.SYS_RECVMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), unix.MSG_DONTWAIT, 0, 0)
   201  		if e == 0 {
   202  			return int(n), nil
   203  		}
   204  
   205  		event := PollEvent{
   206  			FD:     int32(fd),
   207  			Events: 1, // POLLIN
   208  		}
   209  
   210  		if _, e := BlockingPoll(&event, 1, nil); e != 0 && e != unix.EINTR {
   211  			return 0, TranslateErrno(e)
   212  		}
   213  	}
   214  }