gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/hostinet/sockopt.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hostinet 16 17 import ( 18 "fmt" 19 "sync" 20 "time" 21 22 "golang.org/x/sys/unix" 23 "gvisor.dev/gvisor/pkg/abi/linux" 24 "gvisor.dev/gvisor/pkg/hostarch" 25 "gvisor.dev/gvisor/pkg/marshal" 26 "gvisor.dev/gvisor/pkg/marshal/primitive" 27 "gvisor.dev/gvisor/pkg/sentry/kernel" 28 "gvisor.dev/gvisor/pkg/syserr" 29 ) 30 31 const ( 32 sizeofInt16 = 2 33 sizeofInt32 = 4 34 ) 35 36 // SockOpt is used to generate get/setsockopt handlers and filters. 37 type SockOpt struct { 38 // Level the socket option applies to. 39 Level uint64 40 // Name of the option. 41 Name uint64 42 // Size of the parameter. A size of 0 indicates that any size is 43 // allowed (used for string or other variable-length types). 44 Size uint64 45 // Support getsockopt on this option. 46 AllowGet bool 47 // Support setsockopt on this option. 48 AllowSet bool 49 } 50 51 // SockOpts are the socket options supported by hostinet by making syscalls to the host. 52 // 53 // Note the following socket options are supported but do not need syscalls to 54 // the host, so do not appear on this list: 55 // - SO_TYPE, SO_PROTOCOL, SO_DOMAIN are handled at the syscall level in 56 // syscalls/sys_socket.go. 57 // - SO_SNDTIMEOU, SO_RCVTIMEO are handled internally by setting the embedded 58 // socket.SendReceiveTimeout. 59 var SockOpts = []SockOpt{ 60 {linux.SOL_IP, linux.IP_ADD_MEMBERSHIP, 0, false, true}, 61 {linux.SOL_IP, linux.IP_DROP_MEMBERSHIP, 0, false, true}, 62 {linux.SOL_IP, linux.IP_HDRINCL, sizeofInt32, true, true}, 63 {linux.SOL_IP, linux.IP_MULTICAST_IF, 0 /* kernel allows multiple structures to be passed */, true, true}, 64 {linux.SOL_IP, linux.IP_MULTICAST_LOOP, 0 /* can be 32-bit int or 8-bit uint */, true, true}, 65 {linux.SOL_IP, linux.IP_MULTICAST_TTL, 0 /* can be 32-bit int or 8-bit uint */, true, true}, 66 {linux.SOL_IP, linux.IP_MTU_DISCOVER, 0 /* can be 32-bit int or 8-bit uint */, true, true}, 67 {linux.SOL_IP, linux.IP_PKTINFO, sizeofInt32, true, true}, 68 {linux.SOL_IP, linux.IP_RECVERR, sizeofInt32, true, true}, 69 {linux.SOL_IP, linux.IP_RECVORIGDSTADDR, sizeofInt32, true, true}, 70 {linux.SOL_IP, linux.IP_RECVTOS, sizeofInt32, true, true}, 71 {linux.SOL_IP, linux.IP_RECVTTL, sizeofInt32, true, true}, 72 {linux.SOL_IP, linux.IP_TOS, 0 /* Can be 32, 16, or 8 bits */, true, true}, 73 {linux.SOL_IP, linux.IP_TTL, sizeofInt32, true, true}, 74 75 {linux.SOL_IPV6, linux.IPV6_CHECKSUM, sizeofInt32, true, true}, 76 {linux.SOL_IPV6, linux.IPV6_MULTICAST_HOPS, sizeofInt32, true, true}, 77 {linux.SOL_IPV6, linux.IPV6_RECVERR, sizeofInt32, true, true}, 78 {linux.SOL_IPV6, linux.IPV6_RECVHOPLIMIT, sizeofInt32, true, true}, 79 {linux.SOL_IPV6, linux.IPV6_RECVORIGDSTADDR, sizeofInt32, true, true}, 80 {linux.SOL_IPV6, linux.IPV6_RECVPKTINFO, sizeofInt32, true, true}, 81 {linux.SOL_IPV6, linux.IPV6_RECVTCLASS, sizeofInt32, true, true}, 82 {linux.SOL_IPV6, linux.IPV6_TCLASS, sizeofInt32, true, true}, 83 {linux.SOL_IPV6, linux.IPV6_UNICAST_HOPS, sizeofInt32, true, true}, 84 {linux.SOL_IPV6, linux.IPV6_V6ONLY, sizeofInt32, true, true}, 85 86 {linux.SOL_SOCKET, linux.SO_ACCEPTCONN, sizeofInt32, true, true}, 87 {linux.SOL_SOCKET, linux.SO_BINDTODEVICE, 0, true, true}, 88 {linux.SOL_SOCKET, linux.SO_BROADCAST, sizeofInt32, true, true}, 89 {linux.SOL_SOCKET, linux.SO_ERROR, sizeofInt32, true, false}, 90 {linux.SOL_SOCKET, linux.SO_KEEPALIVE, sizeofInt32, true, true}, 91 {linux.SOL_SOCKET, linux.SO_LINGER, linux.SizeOfLinger, true, true}, 92 {linux.SOL_SOCKET, linux.SO_NO_CHECK, sizeofInt32, true, true}, 93 {linux.SOL_SOCKET, linux.SO_OOBINLINE, sizeofInt32, true, true}, 94 {linux.SOL_SOCKET, linux.SO_PASSCRED, sizeofInt32, true, true}, 95 {linux.SOL_SOCKET, linux.SO_RCVBUF, sizeofInt32, true, true}, 96 {linux.SOL_SOCKET, linux.SO_RCVBUFFORCE, sizeofInt32, false, true}, 97 {linux.SOL_SOCKET, linux.SO_RCVLOWAT, sizeofInt32, true, true}, 98 {linux.SOL_SOCKET, linux.SO_REUSEADDR, sizeofInt32, true, true}, 99 {linux.SOL_SOCKET, linux.SO_REUSEPORT, sizeofInt32, true, true}, 100 {linux.SOL_SOCKET, linux.SO_SNDBUF, sizeofInt32, true, true}, 101 {linux.SOL_SOCKET, linux.SO_TIMESTAMP, sizeofInt32, true, true}, 102 103 {linux.SOL_TCP, linux.TCP_CONGESTION, 0 /* string */, true, true}, 104 {linux.SOL_TCP, linux.TCP_CORK, sizeofInt32, true, true}, 105 {linux.SOL_TCP, linux.TCP_DEFER_ACCEPT, sizeofInt32, true, true}, 106 {linux.SOL_TCP, linux.TCP_INFO, uint64(linux.SizeOfTCPInfo), true, false}, 107 {linux.SOL_TCP, linux.TCP_INQ, sizeofInt32, true, true}, 108 {linux.SOL_TCP, linux.TCP_KEEPCNT, sizeofInt32, true, true}, 109 {linux.SOL_TCP, linux.TCP_KEEPIDLE, sizeofInt32, true, true}, 110 {linux.SOL_TCP, linux.TCP_KEEPINTVL, sizeofInt32, true, true}, 111 {linux.SOL_TCP, linux.TCP_LINGER2, sizeofInt32, true, true}, 112 {linux.SOL_TCP, linux.TCP_MAXSEG, sizeofInt32, true, true}, 113 {linux.SOL_TCP, linux.TCP_NODELAY, sizeofInt32, true, true}, 114 {linux.SOL_TCP, linux.TCP_QUICKACK, sizeofInt32, true, true}, 115 {linux.SOL_TCP, linux.TCP_SYNCNT, sizeofInt32, true, true}, 116 {linux.SOL_TCP, linux.TCP_USER_TIMEOUT, sizeofInt32, true, true}, 117 {linux.SOL_TCP, linux.TCP_WINDOW_CLAMP, sizeofInt32, true, true}, 118 119 {linux.SOL_ICMPV6, linux.ICMPV6_FILTER, uint64(linux.SizeOfICMP6Filter), true, true}, 120 } 121 122 // sockOptMap is a map of {level, name} -> SockOpts. It is an optimization for 123 // looking up SockOpts by level and name. The map is initialized in the first 124 // call to Get/SetSockOpt. 125 var ( 126 sockOptMap map[levelName]SockOpt 127 sockOptMapOnce sync.Once 128 ) 129 130 type levelName struct { 131 level uint64 132 name uint64 133 } 134 135 func initSockOptMap(t *kernel.Task) { 136 opts := append(SockOpts, extraSockOpts(t)...) 137 sockOptMap = make(map[levelName]SockOpt, len(opts)) 138 for _, opt := range opts { 139 ln := levelName{opt.Level, opt.Name} 140 if _, ok := sockOptMap[ln]; ok { 141 panic(fmt.Sprintf("multiple sockopts with level=%d and name=%d", opt.Level, opt.Name)) 142 } 143 sockOptMap[ln] = opt 144 } 145 } 146 147 // GetSockOpt implements socket.Socket.GetSockOpt. 148 func (s *Socket) GetSockOpt(t *kernel.Task, level, name int, optValAddr hostarch.Addr, optLen int) (marshal.Marshallable, *syserr.Error) { 149 sockOptMapOnce.Do(func() { initSockOptMap(t) }) 150 151 if optLen < 0 { 152 return nil, syserr.ErrInvalidArgument 153 } 154 155 // Special case send/recv timeouts since those are handled internally. 156 if level == linux.SOL_SOCKET { 157 switch name { 158 case linux.SO_RCVTIMEO: 159 recvTimeout := linux.NsecToTimeval(s.RecvTimeout()) 160 return &recvTimeout, nil 161 case linux.SO_SNDTIMEO: 162 sndTimeout := linux.NsecToTimeval(s.SendTimeout()) 163 return &sndTimeout, nil 164 } 165 } 166 167 sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}] 168 if !ok { 169 return nil, syserr.ErrProtocolNotAvailable 170 } 171 if !sockOpt.AllowGet { 172 return nil, syserr.ErrInvalidArgument 173 } 174 var opt []byte 175 if sockOpt.Size > 0 { 176 // Validate size of input buffer. 177 if uint64(optLen) < sockOpt.Size { 178 // Special case for options that allow smaller buffers. 179 // 180 // To keep the syscall filters simple and restrictive, 181 // we use the full buffer size when calling the host, 182 // but truncate before returning to the application. 183 switch { 184 case level == linux.SOL_TCP && name == linux.TCP_INFO: 185 // Allow smaller buffer. 186 case level == linux.SOL_ICMPV6 && name == linux.ICMPV6_FILTER: 187 // Allow smaller buffer. 188 case level == linux.SOL_IP && name == linux.IP_TTL: 189 // Allow smaller buffer. 190 case level == linux.SOL_IPV6 && name == linux.IPV6_TCLASS: 191 // Allow smaller buffer. 192 default: 193 return nil, syserr.ErrInvalidArgument 194 } 195 } 196 opt = make([]byte, sockOpt.Size) 197 } else { 198 // No size checking. This is probably a string. Use the size 199 // they gave us. 200 opt = make([]byte, optLen) 201 } 202 if err := preGetSockOpt(t, level, name, optValAddr, opt); err != nil { 203 return nil, syserr.FromError(err) 204 } 205 var err error 206 opt, err = getsockopt(s.fd, level, name, opt) 207 if err != nil { 208 return nil, syserr.FromError(err) 209 } 210 opt = postGetSockOpt(t, level, name, opt) 211 // If option allows a smaller buffer, truncate it to desired size. 212 if uint64(optLen) < sockOpt.Size { 213 opt = opt[:optLen] 214 } 215 optP := primitive.ByteSlice(opt) 216 return &optP, nil 217 } 218 219 // SetSockOpt implements socket.Socket.SetSockOpt. 220 func (s *Socket) SetSockOpt(t *kernel.Task, level, name int, opt []byte) *syserr.Error { 221 sockOptMapOnce.Do(func() { initSockOptMap(t) }) 222 223 // Special case send/recv timeouts since those are handled internally. 224 if level == linux.SOL_SOCKET { 225 switch name { 226 case linux.SO_RCVTIMEO: 227 optLen := linux.SizeOfTimeval 228 var v linux.Timeval 229 v.UnmarshalBytes(opt[:optLen]) 230 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 231 return syserr.ErrDomain 232 } 233 s.SetRecvTimeout(v.ToNsecCapped()) 234 return nil 235 case linux.SO_SNDTIMEO: 236 optLen := linux.SizeOfTimeval 237 var v linux.Timeval 238 v.UnmarshalBytes(opt[:optLen]) 239 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 240 return syserr.ErrDomain 241 } 242 s.SetSendTimeout(v.ToNsecCapped()) 243 return nil 244 } 245 } 246 sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}] 247 if !ok { 248 // Pretend to accept socket options we don't understand. This 249 // seems dangerous, but it's what netstack does... 250 return nil 251 } 252 if !sockOpt.AllowSet { 253 return syserr.ErrInvalidArgument 254 } 255 if sockOpt.Size > 0 { 256 if uint64(len(opt)) < sockOpt.Size { 257 return syserr.ErrInvalidArgument 258 } 259 opt = opt[:sockOpt.Size] 260 } 261 if _, _, errno := unix.Syscall6(unix.SYS_SETSOCKOPT, uintptr(s.fd), uintptr(level), uintptr(name), uintptr(firstBytePtr(opt)), uintptr(len(opt)), 0); errno != 0 { 262 return syserr.FromError(errno) 263 } 264 return nil 265 }