github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/socket/hostinet/sockopt.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hostinet
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/marshal"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/syserr"
    29  )
    30  
    31  const (
    32  	sizeofInt16 = 2
    33  	sizeofInt32 = 4
    34  )
    35  
    36  // SockOpt is used to generate get/setsockopt handlers and filters.
    37  type SockOpt struct {
    38  	// Level the socket option applies to.
    39  	Level uint64
    40  	// Name of the option.
    41  	Name uint64
    42  	// Size of the parameter. A size of 0 indicates that any size is
    43  	// allowed (used for string or other variable-length types).
    44  	Size uint64
    45  	// Support getsockopt on this option.
    46  	AllowGet bool
    47  	// Support setsockopt on this option.
    48  	AllowSet bool
    49  }
    50  
    51  // SockOpts are the socket options supported by hostinet by making syscalls to the host.
    52  //
    53  // Note the following socket options are supported but do not need syscalls to
    54  // the host, so do not appear on this list:
    55  //   - SO_TYPE, SO_PROTOCOL, SO_DOMAIN are handled at the syscall level in
    56  //     syscalls/sys_socket.go.
    57  //   - SO_SNDTIMEOU, SO_RCVTIMEO are handled internally by setting the embedded
    58  //     socket.SendReceiveTimeout.
    59  var SockOpts = []SockOpt{
    60  	{linux.SOL_IP, linux.IP_ADD_MEMBERSHIP, 0, false, true},
    61  	{linux.SOL_IP, linux.IP_DROP_MEMBERSHIP, 0, false, true},
    62  	{linux.SOL_IP, linux.IP_HDRINCL, sizeofInt32, true, true},
    63  	{linux.SOL_IP, linux.IP_MULTICAST_IF, uint64(linux.SizeOfInetAddr), true, true},
    64  	{linux.SOL_IP, linux.IP_MULTICAST_LOOP, 0 /* can be 32-bit int or 8-bit uint */, true, true},
    65  	{linux.SOL_IP, linux.IP_MULTICAST_TTL, 0 /* can be 32-bit int or 8-bit uint */, true, true},
    66  	{linux.SOL_IP, linux.IP_PKTINFO, sizeofInt32, true, true},
    67  	{linux.SOL_IP, linux.IP_RECVERR, sizeofInt32, true, true},
    68  	{linux.SOL_IP, linux.IP_RECVORIGDSTADDR, sizeofInt32, true, true},
    69  	{linux.SOL_IP, linux.IP_RECVTOS, sizeofInt32, true, true},
    70  	{linux.SOL_IP, linux.IP_RECVTTL, sizeofInt32, true, true},
    71  	{linux.SOL_IP, linux.IP_TOS, 0 /* Can be 32, 16, or 8 bits */, true, true},
    72  	{linux.SOL_IP, linux.IP_TTL, sizeofInt32, true, true},
    73  
    74  	{linux.SOL_IPV6, linux.IPV6_CHECKSUM, sizeofInt32, true, true},
    75  	{linux.SOL_IPV6, linux.IPV6_MULTICAST_HOPS, sizeofInt32, true, true},
    76  	{linux.SOL_IPV6, linux.IPV6_RECVERR, sizeofInt32, true, true},
    77  	{linux.SOL_IPV6, linux.IPV6_RECVHOPLIMIT, sizeofInt32, true, true},
    78  	{linux.SOL_IPV6, linux.IPV6_RECVORIGDSTADDR, sizeofInt32, true, true},
    79  	{linux.SOL_IPV6, linux.IPV6_RECVPKTINFO, sizeofInt32, true, true},
    80  	{linux.SOL_IPV6, linux.IPV6_RECVTCLASS, sizeofInt32, true, true},
    81  	{linux.SOL_IPV6, linux.IPV6_TCLASS, sizeofInt32, true, true},
    82  	{linux.SOL_IPV6, linux.IPV6_UNICAST_HOPS, sizeofInt32, true, true},
    83  	{linux.SOL_IPV6, linux.IPV6_V6ONLY, sizeofInt32, true, true},
    84  
    85  	{linux.SOL_SOCKET, linux.SO_ACCEPTCONN, sizeofInt32, true, true},
    86  	{linux.SOL_SOCKET, linux.SO_BINDTODEVICE, 0, true, true},
    87  	{linux.SOL_SOCKET, linux.SO_BROADCAST, sizeofInt32, true, true},
    88  	{linux.SOL_SOCKET, linux.SO_ERROR, sizeofInt32, true, false},
    89  	{linux.SOL_SOCKET, linux.SO_KEEPALIVE, sizeofInt32, true, true},
    90  	{linux.SOL_SOCKET, linux.SO_LINGER, linux.SizeOfLinger, true, true},
    91  	{linux.SOL_SOCKET, linux.SO_NO_CHECK, sizeofInt32, true, true},
    92  	{linux.SOL_SOCKET, linux.SO_OOBINLINE, sizeofInt32, true, true},
    93  	{linux.SOL_SOCKET, linux.SO_PASSCRED, sizeofInt32, true, true},
    94  	{linux.SOL_SOCKET, linux.SO_RCVBUF, sizeofInt32, true, true},
    95  	{linux.SOL_SOCKET, linux.SO_RCVBUFFORCE, sizeofInt32, false, true},
    96  	{linux.SOL_SOCKET, linux.SO_RCVLOWAT, sizeofInt32, true, true},
    97  	{linux.SOL_SOCKET, linux.SO_REUSEADDR, sizeofInt32, true, true},
    98  	{linux.SOL_SOCKET, linux.SO_REUSEPORT, sizeofInt32, true, true},
    99  	{linux.SOL_SOCKET, linux.SO_SNDBUF, sizeofInt32, true, true},
   100  	{linux.SOL_SOCKET, linux.SO_TIMESTAMP, sizeofInt32, true, true},
   101  
   102  	{linux.SOL_TCP, linux.TCP_CONGESTION, 0 /* string */, true, true},
   103  	{linux.SOL_TCP, linux.TCP_CORK, sizeofInt32, true, true},
   104  	{linux.SOL_TCP, linux.TCP_DEFER_ACCEPT, sizeofInt32, true, true},
   105  	{linux.SOL_TCP, linux.TCP_INFO, uint64(linux.SizeOfTCPInfo), true, false},
   106  	{linux.SOL_TCP, linux.TCP_INQ, sizeofInt32, true, true},
   107  	{linux.SOL_TCP, linux.TCP_KEEPCNT, sizeofInt32, true, true},
   108  	{linux.SOL_TCP, linux.TCP_KEEPIDLE, sizeofInt32, true, true},
   109  	{linux.SOL_TCP, linux.TCP_KEEPINTVL, sizeofInt32, true, true},
   110  	{linux.SOL_TCP, linux.TCP_LINGER2, sizeofInt32, true, true},
   111  	{linux.SOL_TCP, linux.TCP_MAXSEG, sizeofInt32, true, true},
   112  	{linux.SOL_TCP, linux.TCP_NODELAY, sizeofInt32, true, true},
   113  	{linux.SOL_TCP, linux.TCP_QUICKACK, sizeofInt32, true, true},
   114  	{linux.SOL_TCP, linux.TCP_SYNCNT, sizeofInt32, true, true},
   115  	{linux.SOL_TCP, linux.TCP_USER_TIMEOUT, sizeofInt32, true, true},
   116  	{linux.SOL_TCP, linux.TCP_WINDOW_CLAMP, sizeofInt32, true, true},
   117  
   118  	{linux.SOL_ICMPV6, linux.ICMPV6_FILTER, uint64(linux.SizeOfICMP6Filter), true, true},
   119  }
   120  
   121  // sockOptMap is a map of {level, name} -> SockOpts. It is an optimization for
   122  // looking up SockOpts by level and name. The map is initialized in the first
   123  // call to Get/SetSockOpt.
   124  var (
   125  	sockOptMap     map[levelName]SockOpt
   126  	sockOptMapOnce sync.Once
   127  )
   128  
   129  type levelName struct {
   130  	level uint64
   131  	name  uint64
   132  }
   133  
   134  func initSockOptMap(t *kernel.Task) {
   135  	opts := append(SockOpts, extraSockOpts(t)...)
   136  	sockOptMap = make(map[levelName]SockOpt, len(opts))
   137  	for _, opt := range opts {
   138  		ln := levelName{opt.Level, opt.Name}
   139  		if _, ok := sockOptMap[ln]; ok {
   140  			panic(fmt.Sprintf("multiple sockopts with level=%d and name=%d", opt.Level, opt.Name))
   141  		}
   142  		sockOptMap[ln] = opt
   143  	}
   144  }
   145  
   146  // GetSockOpt implements socket.Socket.GetSockOpt.
   147  func (s *Socket) GetSockOpt(t *kernel.Task, level, name int, optValAddr hostarch.Addr, optLen int) (marshal.Marshallable, *syserr.Error) {
   148  	sockOptMapOnce.Do(func() { initSockOptMap(t) })
   149  
   150  	if optLen < 0 {
   151  		return nil, syserr.ErrInvalidArgument
   152  	}
   153  
   154  	// Special case send/recv timeouts since those are handled internally.
   155  	if level == linux.SOL_SOCKET {
   156  		switch name {
   157  		case linux.SO_RCVTIMEO:
   158  			recvTimeout := linux.NsecToTimeval(s.RecvTimeout())
   159  			return &recvTimeout, nil
   160  		case linux.SO_SNDTIMEO:
   161  			sndTimeout := linux.NsecToTimeval(s.SendTimeout())
   162  			return &sndTimeout, nil
   163  		}
   164  	}
   165  
   166  	sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}]
   167  	if !ok {
   168  		return nil, syserr.ErrProtocolNotAvailable
   169  	}
   170  	if !sockOpt.AllowGet {
   171  		return nil, syserr.ErrInvalidArgument
   172  	}
   173  	var opt []byte
   174  	if sockOpt.Size > 0 {
   175  		// Validate size of input buffer.
   176  		if uint64(optLen) < sockOpt.Size {
   177  			// Special case for options that allow smaller buffers.
   178  			//
   179  			// To keep the syscall filters simple and restrictive,
   180  			// we use the full buffer size when calling the host,
   181  			// but truncate before returning to the application.
   182  			switch {
   183  			case level == linux.SOL_TCP && name == linux.TCP_INFO:
   184  				// Allow smaller buffer.
   185  			case level == linux.SOL_ICMPV6 && name == linux.ICMPV6_FILTER:
   186  				// Allow smaller buffer.
   187  			case level == linux.SOL_IP && name == linux.IP_TTL:
   188  				// Allow smaller buffer.
   189  			case level == linux.SOL_IPV6 && name == linux.IPV6_TCLASS:
   190  				// Allow smaller buffer.
   191  			default:
   192  				return nil, syserr.ErrInvalidArgument
   193  			}
   194  		}
   195  		opt = make([]byte, sockOpt.Size)
   196  	} else {
   197  		// No size checking. This is probably a string. Use the size
   198  		// they gave us.
   199  		opt = make([]byte, optLen)
   200  	}
   201  	if err := preGetSockOpt(t, level, name, optValAddr, opt); err != nil {
   202  		return nil, syserr.FromError(err)
   203  	}
   204  	var err error
   205  	opt, err = getsockopt(s.fd, level, name, opt)
   206  	if err != nil {
   207  		return nil, syserr.FromError(err)
   208  	}
   209  	opt = postGetSockOpt(t, level, name, opt)
   210  	// If option allows a smaller buffer, truncate it to desired size.
   211  	if uint64(optLen) < sockOpt.Size {
   212  		opt = opt[:optLen]
   213  	}
   214  	optP := primitive.ByteSlice(opt)
   215  	return &optP, nil
   216  }
   217  
   218  // SetSockOpt implements socket.Socket.SetSockOpt.
   219  func (s *Socket) SetSockOpt(t *kernel.Task, level, name int, opt []byte) *syserr.Error {
   220  	sockOptMapOnce.Do(func() { initSockOptMap(t) })
   221  
   222  	// Special case send/recv timeouts since those are handled internally.
   223  	if level == linux.SOL_SOCKET {
   224  		switch name {
   225  		case linux.SO_RCVTIMEO:
   226  			optLen := linux.SizeOfTimeval
   227  			var v linux.Timeval
   228  			v.UnmarshalBytes(opt[:optLen])
   229  			if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
   230  				return syserr.ErrDomain
   231  			}
   232  			s.SetRecvTimeout(v.ToNsecCapped())
   233  			return nil
   234  		case linux.SO_SNDTIMEO:
   235  			optLen := linux.SizeOfTimeval
   236  			var v linux.Timeval
   237  			v.UnmarshalBytes(opt[:optLen])
   238  			if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
   239  				return syserr.ErrDomain
   240  			}
   241  			s.SetSendTimeout(v.ToNsecCapped())
   242  			return nil
   243  		}
   244  	}
   245  	sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}]
   246  	if !ok {
   247  		// Pretend to accept socket options we don't understand. This
   248  		// seems dangerous, but it's what netstack does...
   249  		return nil
   250  	}
   251  	if !sockOpt.AllowSet {
   252  		return syserr.ErrInvalidArgument
   253  	}
   254  	if sockOpt.Size > 0 {
   255  		if uint64(len(opt)) < sockOpt.Size {
   256  			return syserr.ErrInvalidArgument
   257  		}
   258  		opt = opt[:sockOpt.Size]
   259  	}
   260  	if _, _, errno := unix.Syscall6(unix.SYS_SETSOCKOPT, uintptr(s.fd), uintptr(level), uintptr(name), uintptr(firstBytePtr(opt)), uintptr(len(opt)), 0); errno != 0 {
   261  		return syserr.FromError(errno)
   262  	}
   263  	return nil
   264  }