gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/hostinet/sockopt.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hostinet
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"gvisor.dev/gvisor/pkg/abi/linux"
    24  	"gvisor.dev/gvisor/pkg/hostarch"
    25  	"gvisor.dev/gvisor/pkg/marshal"
    26  	"gvisor.dev/gvisor/pkg/marshal/primitive"
    27  	"gvisor.dev/gvisor/pkg/sentry/kernel"
    28  	"gvisor.dev/gvisor/pkg/syserr"
    29  )
    30  
    31  const (
    32  	sizeofInt16 = 2
    33  	sizeofInt32 = 4
    34  )
    35  
    36  // SockOpt is used to generate get/setsockopt handlers and filters.
    37  type SockOpt struct {
    38  	// Level the socket option applies to.
    39  	Level uint64
    40  	// Name of the option.
    41  	Name uint64
    42  	// Size of the parameter. A size of 0 indicates that any size is
    43  	// allowed (used for string or other variable-length types).
    44  	Size uint64
    45  	// Support getsockopt on this option.
    46  	AllowGet bool
    47  	// Support setsockopt on this option.
    48  	AllowSet bool
    49  }
    50  
    51  // SockOpts are the socket options supported by hostinet by making syscalls to the host.
    52  //
    53  // Note the following socket options are supported but do not need syscalls to
    54  // the host, so do not appear on this list:
    55  //   - SO_TYPE, SO_PROTOCOL, SO_DOMAIN are handled at the syscall level in
    56  //     syscalls/sys_socket.go.
    57  //   - SO_SNDTIMEOU, SO_RCVTIMEO are handled internally by setting the embedded
    58  //     socket.SendReceiveTimeout.
    59  var SockOpts = []SockOpt{
    60  	{linux.SOL_IP, linux.IP_ADD_MEMBERSHIP, 0, false, true},
    61  	{linux.SOL_IP, linux.IP_DROP_MEMBERSHIP, 0, false, true},
    62  	{linux.SOL_IP, linux.IP_HDRINCL, sizeofInt32, true, true},
    63  	{linux.SOL_IP, linux.IP_MULTICAST_IF, 0 /* kernel allows multiple structures to be passed */, true, true},
    64  	{linux.SOL_IP, linux.IP_MULTICAST_LOOP, 0 /* can be 32-bit int or 8-bit uint */, true, true},
    65  	{linux.SOL_IP, linux.IP_MULTICAST_TTL, 0 /* can be 32-bit int or 8-bit uint */, true, true},
    66  	{linux.SOL_IP, linux.IP_MTU_DISCOVER, 0 /* can be 32-bit int or 8-bit uint */, true, true},
    67  	{linux.SOL_IP, linux.IP_PKTINFO, sizeofInt32, true, true},
    68  	{linux.SOL_IP, linux.IP_RECVERR, sizeofInt32, true, true},
    69  	{linux.SOL_IP, linux.IP_RECVORIGDSTADDR, sizeofInt32, true, true},
    70  	{linux.SOL_IP, linux.IP_RECVTOS, sizeofInt32, true, true},
    71  	{linux.SOL_IP, linux.IP_RECVTTL, sizeofInt32, true, true},
    72  	{linux.SOL_IP, linux.IP_TOS, 0 /* Can be 32, 16, or 8 bits */, true, true},
    73  	{linux.SOL_IP, linux.IP_TTL, sizeofInt32, true, true},
    74  
    75  	{linux.SOL_IPV6, linux.IPV6_CHECKSUM, sizeofInt32, true, true},
    76  	{linux.SOL_IPV6, linux.IPV6_MULTICAST_HOPS, sizeofInt32, true, true},
    77  	{linux.SOL_IPV6, linux.IPV6_RECVERR, sizeofInt32, true, true},
    78  	{linux.SOL_IPV6, linux.IPV6_RECVHOPLIMIT, sizeofInt32, true, true},
    79  	{linux.SOL_IPV6, linux.IPV6_RECVORIGDSTADDR, sizeofInt32, true, true},
    80  	{linux.SOL_IPV6, linux.IPV6_RECVPKTINFO, sizeofInt32, true, true},
    81  	{linux.SOL_IPV6, linux.IPV6_RECVTCLASS, sizeofInt32, true, true},
    82  	{linux.SOL_IPV6, linux.IPV6_TCLASS, sizeofInt32, true, true},
    83  	{linux.SOL_IPV6, linux.IPV6_UNICAST_HOPS, sizeofInt32, true, true},
    84  	{linux.SOL_IPV6, linux.IPV6_V6ONLY, sizeofInt32, true, true},
    85  
    86  	{linux.SOL_SOCKET, linux.SO_ACCEPTCONN, sizeofInt32, true, true},
    87  	{linux.SOL_SOCKET, linux.SO_BINDTODEVICE, 0, true, true},
    88  	{linux.SOL_SOCKET, linux.SO_BROADCAST, sizeofInt32, true, true},
    89  	{linux.SOL_SOCKET, linux.SO_ERROR, sizeofInt32, true, false},
    90  	{linux.SOL_SOCKET, linux.SO_KEEPALIVE, sizeofInt32, true, true},
    91  	{linux.SOL_SOCKET, linux.SO_LINGER, linux.SizeOfLinger, true, true},
    92  	{linux.SOL_SOCKET, linux.SO_NO_CHECK, sizeofInt32, true, true},
    93  	{linux.SOL_SOCKET, linux.SO_OOBINLINE, sizeofInt32, true, true},
    94  	{linux.SOL_SOCKET, linux.SO_PASSCRED, sizeofInt32, true, true},
    95  	{linux.SOL_SOCKET, linux.SO_RCVBUF, sizeofInt32, true, true},
    96  	{linux.SOL_SOCKET, linux.SO_RCVBUFFORCE, sizeofInt32, false, true},
    97  	{linux.SOL_SOCKET, linux.SO_RCVLOWAT, sizeofInt32, true, true},
    98  	{linux.SOL_SOCKET, linux.SO_REUSEADDR, sizeofInt32, true, true},
    99  	{linux.SOL_SOCKET, linux.SO_REUSEPORT, sizeofInt32, true, true},
   100  	{linux.SOL_SOCKET, linux.SO_SNDBUF, sizeofInt32, true, true},
   101  	{linux.SOL_SOCKET, linux.SO_TIMESTAMP, sizeofInt32, true, true},
   102  
   103  	{linux.SOL_TCP, linux.TCP_CONGESTION, 0 /* string */, true, true},
   104  	{linux.SOL_TCP, linux.TCP_CORK, sizeofInt32, true, true},
   105  	{linux.SOL_TCP, linux.TCP_DEFER_ACCEPT, sizeofInt32, true, true},
   106  	{linux.SOL_TCP, linux.TCP_INFO, uint64(linux.SizeOfTCPInfo), true, false},
   107  	{linux.SOL_TCP, linux.TCP_INQ, sizeofInt32, true, true},
   108  	{linux.SOL_TCP, linux.TCP_KEEPCNT, sizeofInt32, true, true},
   109  	{linux.SOL_TCP, linux.TCP_KEEPIDLE, sizeofInt32, true, true},
   110  	{linux.SOL_TCP, linux.TCP_KEEPINTVL, sizeofInt32, true, true},
   111  	{linux.SOL_TCP, linux.TCP_LINGER2, sizeofInt32, true, true},
   112  	{linux.SOL_TCP, linux.TCP_MAXSEG, sizeofInt32, true, true},
   113  	{linux.SOL_TCP, linux.TCP_NODELAY, sizeofInt32, true, true},
   114  	{linux.SOL_TCP, linux.TCP_QUICKACK, sizeofInt32, true, true},
   115  	{linux.SOL_TCP, linux.TCP_SYNCNT, sizeofInt32, true, true},
   116  	{linux.SOL_TCP, linux.TCP_USER_TIMEOUT, sizeofInt32, true, true},
   117  	{linux.SOL_TCP, linux.TCP_WINDOW_CLAMP, sizeofInt32, true, true},
   118  
   119  	{linux.SOL_ICMPV6, linux.ICMPV6_FILTER, uint64(linux.SizeOfICMP6Filter), true, true},
   120  }
   121  
   122  // sockOptMap is a map of {level, name} -> SockOpts. It is an optimization for
   123  // looking up SockOpts by level and name. The map is initialized in the first
   124  // call to Get/SetSockOpt.
   125  var (
   126  	sockOptMap     map[levelName]SockOpt
   127  	sockOptMapOnce sync.Once
   128  )
   129  
   130  type levelName struct {
   131  	level uint64
   132  	name  uint64
   133  }
   134  
   135  func initSockOptMap(t *kernel.Task) {
   136  	opts := append(SockOpts, extraSockOpts(t)...)
   137  	sockOptMap = make(map[levelName]SockOpt, len(opts))
   138  	for _, opt := range opts {
   139  		ln := levelName{opt.Level, opt.Name}
   140  		if _, ok := sockOptMap[ln]; ok {
   141  			panic(fmt.Sprintf("multiple sockopts with level=%d and name=%d", opt.Level, opt.Name))
   142  		}
   143  		sockOptMap[ln] = opt
   144  	}
   145  }
   146  
   147  // GetSockOpt implements socket.Socket.GetSockOpt.
   148  func (s *Socket) GetSockOpt(t *kernel.Task, level, name int, optValAddr hostarch.Addr, optLen int) (marshal.Marshallable, *syserr.Error) {
   149  	sockOptMapOnce.Do(func() { initSockOptMap(t) })
   150  
   151  	if optLen < 0 {
   152  		return nil, syserr.ErrInvalidArgument
   153  	}
   154  
   155  	// Special case send/recv timeouts since those are handled internally.
   156  	if level == linux.SOL_SOCKET {
   157  		switch name {
   158  		case linux.SO_RCVTIMEO:
   159  			recvTimeout := linux.NsecToTimeval(s.RecvTimeout())
   160  			return &recvTimeout, nil
   161  		case linux.SO_SNDTIMEO:
   162  			sndTimeout := linux.NsecToTimeval(s.SendTimeout())
   163  			return &sndTimeout, nil
   164  		}
   165  	}
   166  
   167  	sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}]
   168  	if !ok {
   169  		return nil, syserr.ErrProtocolNotAvailable
   170  	}
   171  	if !sockOpt.AllowGet {
   172  		return nil, syserr.ErrInvalidArgument
   173  	}
   174  	var opt []byte
   175  	if sockOpt.Size > 0 {
   176  		// Validate size of input buffer.
   177  		if uint64(optLen) < sockOpt.Size {
   178  			// Special case for options that allow smaller buffers.
   179  			//
   180  			// To keep the syscall filters simple and restrictive,
   181  			// we use the full buffer size when calling the host,
   182  			// but truncate before returning to the application.
   183  			switch {
   184  			case level == linux.SOL_TCP && name == linux.TCP_INFO:
   185  				// Allow smaller buffer.
   186  			case level == linux.SOL_ICMPV6 && name == linux.ICMPV6_FILTER:
   187  				// Allow smaller buffer.
   188  			case level == linux.SOL_IP && name == linux.IP_TTL:
   189  				// Allow smaller buffer.
   190  			case level == linux.SOL_IPV6 && name == linux.IPV6_TCLASS:
   191  				// Allow smaller buffer.
   192  			default:
   193  				return nil, syserr.ErrInvalidArgument
   194  			}
   195  		}
   196  		opt = make([]byte, sockOpt.Size)
   197  	} else {
   198  		// No size checking. This is probably a string. Use the size
   199  		// they gave us.
   200  		opt = make([]byte, optLen)
   201  	}
   202  	if err := preGetSockOpt(t, level, name, optValAddr, opt); err != nil {
   203  		return nil, syserr.FromError(err)
   204  	}
   205  	var err error
   206  	opt, err = getsockopt(s.fd, level, name, opt)
   207  	if err != nil {
   208  		return nil, syserr.FromError(err)
   209  	}
   210  	opt = postGetSockOpt(t, level, name, opt)
   211  	// If option allows a smaller buffer, truncate it to desired size.
   212  	if uint64(optLen) < sockOpt.Size {
   213  		opt = opt[:optLen]
   214  	}
   215  	optP := primitive.ByteSlice(opt)
   216  	return &optP, nil
   217  }
   218  
   219  // SetSockOpt implements socket.Socket.SetSockOpt.
   220  func (s *Socket) SetSockOpt(t *kernel.Task, level, name int, opt []byte) *syserr.Error {
   221  	sockOptMapOnce.Do(func() { initSockOptMap(t) })
   222  
   223  	// Special case send/recv timeouts since those are handled internally.
   224  	if level == linux.SOL_SOCKET {
   225  		switch name {
   226  		case linux.SO_RCVTIMEO:
   227  			optLen := linux.SizeOfTimeval
   228  			var v linux.Timeval
   229  			v.UnmarshalBytes(opt[:optLen])
   230  			if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
   231  				return syserr.ErrDomain
   232  			}
   233  			s.SetRecvTimeout(v.ToNsecCapped())
   234  			return nil
   235  		case linux.SO_SNDTIMEO:
   236  			optLen := linux.SizeOfTimeval
   237  			var v linux.Timeval
   238  			v.UnmarshalBytes(opt[:optLen])
   239  			if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
   240  				return syserr.ErrDomain
   241  			}
   242  			s.SetSendTimeout(v.ToNsecCapped())
   243  			return nil
   244  		}
   245  	}
   246  	sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}]
   247  	if !ok {
   248  		// Pretend to accept socket options we don't understand. This
   249  		// seems dangerous, but it's what netstack does...
   250  		return nil
   251  	}
   252  	if !sockOpt.AllowSet {
   253  		return syserr.ErrInvalidArgument
   254  	}
   255  	if sockOpt.Size > 0 {
   256  		if uint64(len(opt)) < sockOpt.Size {
   257  			return syserr.ErrInvalidArgument
   258  		}
   259  		opt = opt[:sockOpt.Size]
   260  	}
   261  	if _, _, errno := unix.Syscall6(unix.SYS_SETSOCKOPT, uintptr(s.fd), uintptr(level), uintptr(name), uintptr(firstBytePtr(opt)), uintptr(len(opt)), 0); errno != 0 {
   262  		return syserr.FromError(errno)
   263  	}
   264  	return nil
   265  }