github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/socket/hostinet/sockopt.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hostinet 16 17 import ( 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 23 "github.com/MerlinKodo/gvisor/pkg/hostarch" 24 "github.com/MerlinKodo/gvisor/pkg/marshal" 25 "github.com/MerlinKodo/gvisor/pkg/marshal/primitive" 26 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 27 "github.com/MerlinKodo/gvisor/pkg/syserr" 28 "golang.org/x/sys/unix" 29 ) 30 31 const ( 32 sizeofInt16 = 2 33 sizeofInt32 = 4 34 ) 35 36 // SockOpt is used to generate get/setsockopt handlers and filters. 37 type SockOpt struct { 38 // Level the socket option applies to. 39 Level uint64 40 // Name of the option. 41 Name uint64 42 // Size of the parameter. A size of 0 indicates that any size is 43 // allowed (used for string or other variable-length types). 44 Size uint64 45 // Support getsockopt on this option. 46 AllowGet bool 47 // Support setsockopt on this option. 48 AllowSet bool 49 } 50 51 // SockOpts are the socket options supported by hostinet by making syscalls to the host. 52 // 53 // Note the following socket options are supported but do not need syscalls to 54 // the host, so do not appear on this list: 55 // - SO_TYPE, SO_PROTOCOL, SO_DOMAIN are handled at the syscall level in 56 // syscalls/sys_socket.go. 57 // - SO_SNDTIMEOU, SO_RCVTIMEO are handled internally by setting the embedded 58 // socket.SendReceiveTimeout. 59 var SockOpts = []SockOpt{ 60 {linux.SOL_IP, linux.IP_ADD_MEMBERSHIP, 0, false, true}, 61 {linux.SOL_IP, linux.IP_DROP_MEMBERSHIP, 0, false, true}, 62 {linux.SOL_IP, linux.IP_HDRINCL, sizeofInt32, true, true}, 63 {linux.SOL_IP, linux.IP_MULTICAST_IF, uint64(linux.SizeOfInetAddr), true, true}, 64 {linux.SOL_IP, linux.IP_MULTICAST_LOOP, 0 /* can be 32-bit int or 8-bit uint */, true, true}, 65 {linux.SOL_IP, linux.IP_MULTICAST_TTL, 0 /* can be 32-bit int or 8-bit uint */, true, true}, 66 {linux.SOL_IP, linux.IP_PKTINFO, sizeofInt32, true, true}, 67 {linux.SOL_IP, linux.IP_RECVERR, sizeofInt32, true, true}, 68 {linux.SOL_IP, linux.IP_RECVORIGDSTADDR, sizeofInt32, true, true}, 69 {linux.SOL_IP, linux.IP_RECVTOS, sizeofInt32, true, true}, 70 {linux.SOL_IP, linux.IP_RECVTTL, sizeofInt32, true, true}, 71 {linux.SOL_IP, linux.IP_TOS, 0 /* Can be 32, 16, or 8 bits */, true, true}, 72 {linux.SOL_IP, linux.IP_TTL, sizeofInt32, true, true}, 73 74 {linux.SOL_IPV6, linux.IPV6_CHECKSUM, sizeofInt32, true, true}, 75 {linux.SOL_IPV6, linux.IPV6_MULTICAST_HOPS, sizeofInt32, true, true}, 76 {linux.SOL_IPV6, linux.IPV6_RECVERR, sizeofInt32, true, true}, 77 {linux.SOL_IPV6, linux.IPV6_RECVHOPLIMIT, sizeofInt32, true, true}, 78 {linux.SOL_IPV6, linux.IPV6_RECVORIGDSTADDR, sizeofInt32, true, true}, 79 {linux.SOL_IPV6, linux.IPV6_RECVPKTINFO, sizeofInt32, true, true}, 80 {linux.SOL_IPV6, linux.IPV6_RECVTCLASS, sizeofInt32, true, true}, 81 {linux.SOL_IPV6, linux.IPV6_TCLASS, sizeofInt32, true, true}, 82 {linux.SOL_IPV6, linux.IPV6_UNICAST_HOPS, sizeofInt32, true, true}, 83 {linux.SOL_IPV6, linux.IPV6_V6ONLY, sizeofInt32, true, true}, 84 85 {linux.SOL_SOCKET, linux.SO_ACCEPTCONN, sizeofInt32, true, true}, 86 {linux.SOL_SOCKET, linux.SO_BINDTODEVICE, 0, true, true}, 87 {linux.SOL_SOCKET, linux.SO_BROADCAST, sizeofInt32, true, true}, 88 {linux.SOL_SOCKET, linux.SO_ERROR, sizeofInt32, true, false}, 89 {linux.SOL_SOCKET, linux.SO_KEEPALIVE, sizeofInt32, true, true}, 90 {linux.SOL_SOCKET, linux.SO_LINGER, linux.SizeOfLinger, true, true}, 91 {linux.SOL_SOCKET, linux.SO_NO_CHECK, sizeofInt32, true, true}, 92 {linux.SOL_SOCKET, linux.SO_OOBINLINE, sizeofInt32, true, true}, 93 {linux.SOL_SOCKET, linux.SO_PASSCRED, sizeofInt32, true, true}, 94 {linux.SOL_SOCKET, linux.SO_RCVBUF, sizeofInt32, true, true}, 95 {linux.SOL_SOCKET, linux.SO_RCVBUFFORCE, sizeofInt32, false, true}, 96 {linux.SOL_SOCKET, linux.SO_RCVLOWAT, sizeofInt32, true, true}, 97 {linux.SOL_SOCKET, linux.SO_REUSEADDR, sizeofInt32, true, true}, 98 {linux.SOL_SOCKET, linux.SO_REUSEPORT, sizeofInt32, true, true}, 99 {linux.SOL_SOCKET, linux.SO_SNDBUF, sizeofInt32, true, true}, 100 {linux.SOL_SOCKET, linux.SO_TIMESTAMP, sizeofInt32, true, true}, 101 102 {linux.SOL_TCP, linux.TCP_CONGESTION, 0 /* string */, true, true}, 103 {linux.SOL_TCP, linux.TCP_CORK, sizeofInt32, true, true}, 104 {linux.SOL_TCP, linux.TCP_DEFER_ACCEPT, sizeofInt32, true, true}, 105 {linux.SOL_TCP, linux.TCP_INFO, uint64(linux.SizeOfTCPInfo), true, false}, 106 {linux.SOL_TCP, linux.TCP_INQ, sizeofInt32, true, true}, 107 {linux.SOL_TCP, linux.TCP_KEEPCNT, sizeofInt32, true, true}, 108 {linux.SOL_TCP, linux.TCP_KEEPIDLE, sizeofInt32, true, true}, 109 {linux.SOL_TCP, linux.TCP_KEEPINTVL, sizeofInt32, true, true}, 110 {linux.SOL_TCP, linux.TCP_LINGER2, sizeofInt32, true, true}, 111 {linux.SOL_TCP, linux.TCP_MAXSEG, sizeofInt32, true, true}, 112 {linux.SOL_TCP, linux.TCP_NODELAY, sizeofInt32, true, true}, 113 {linux.SOL_TCP, linux.TCP_QUICKACK, sizeofInt32, true, true}, 114 {linux.SOL_TCP, linux.TCP_SYNCNT, sizeofInt32, true, true}, 115 {linux.SOL_TCP, linux.TCP_USER_TIMEOUT, sizeofInt32, true, true}, 116 {linux.SOL_TCP, linux.TCP_WINDOW_CLAMP, sizeofInt32, true, true}, 117 118 {linux.SOL_ICMPV6, linux.ICMPV6_FILTER, uint64(linux.SizeOfICMP6Filter), true, true}, 119 } 120 121 // sockOptMap is a map of {level, name} -> SockOpts. It is an optimization for 122 // looking up SockOpts by level and name. The map is initialized in the first 123 // call to Get/SetSockOpt. 124 var ( 125 sockOptMap map[levelName]SockOpt 126 sockOptMapOnce sync.Once 127 ) 128 129 type levelName struct { 130 level uint64 131 name uint64 132 } 133 134 func initSockOptMap(t *kernel.Task) { 135 opts := append(SockOpts, extraSockOpts(t)...) 136 sockOptMap = make(map[levelName]SockOpt, len(opts)) 137 for _, opt := range opts { 138 ln := levelName{opt.Level, opt.Name} 139 if _, ok := sockOptMap[ln]; ok { 140 panic(fmt.Sprintf("multiple sockopts with level=%d and name=%d", opt.Level, opt.Name)) 141 } 142 sockOptMap[ln] = opt 143 } 144 } 145 146 // GetSockOpt implements socket.Socket.GetSockOpt. 147 func (s *Socket) GetSockOpt(t *kernel.Task, level, name int, optValAddr hostarch.Addr, optLen int) (marshal.Marshallable, *syserr.Error) { 148 sockOptMapOnce.Do(func() { initSockOptMap(t) }) 149 150 if optLen < 0 { 151 return nil, syserr.ErrInvalidArgument 152 } 153 154 // Special case send/recv timeouts since those are handled internally. 155 if level == linux.SOL_SOCKET { 156 switch name { 157 case linux.SO_RCVTIMEO: 158 recvTimeout := linux.NsecToTimeval(s.RecvTimeout()) 159 return &recvTimeout, nil 160 case linux.SO_SNDTIMEO: 161 sndTimeout := linux.NsecToTimeval(s.SendTimeout()) 162 return &sndTimeout, nil 163 } 164 } 165 166 sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}] 167 if !ok { 168 return nil, syserr.ErrProtocolNotAvailable 169 } 170 if !sockOpt.AllowGet { 171 return nil, syserr.ErrInvalidArgument 172 } 173 var opt []byte 174 if sockOpt.Size > 0 { 175 // Validate size of input buffer. 176 if uint64(optLen) < sockOpt.Size { 177 // Special case for options that allow smaller buffers. 178 // 179 // To keep the syscall filters simple and restrictive, 180 // we use the full buffer size when calling the host, 181 // but truncate before returning to the application. 182 switch { 183 case level == linux.SOL_TCP && name == linux.TCP_INFO: 184 // Allow smaller buffer. 185 case level == linux.SOL_ICMPV6 && name == linux.ICMPV6_FILTER: 186 // Allow smaller buffer. 187 case level == linux.SOL_IP && name == linux.IP_TTL: 188 // Allow smaller buffer. 189 case level == linux.SOL_IPV6 && name == linux.IPV6_TCLASS: 190 // Allow smaller buffer. 191 default: 192 return nil, syserr.ErrInvalidArgument 193 } 194 } 195 opt = make([]byte, sockOpt.Size) 196 } else { 197 // No size checking. This is probably a string. Use the size 198 // they gave us. 199 opt = make([]byte, optLen) 200 } 201 if err := preGetSockOpt(t, level, name, optValAddr, opt); err != nil { 202 return nil, syserr.FromError(err) 203 } 204 var err error 205 opt, err = getsockopt(s.fd, level, name, opt) 206 if err != nil { 207 return nil, syserr.FromError(err) 208 } 209 opt = postGetSockOpt(t, level, name, opt) 210 // If option allows a smaller buffer, truncate it to desired size. 211 if uint64(optLen) < sockOpt.Size { 212 opt = opt[:optLen] 213 } 214 optP := primitive.ByteSlice(opt) 215 return &optP, nil 216 } 217 218 // SetSockOpt implements socket.Socket.SetSockOpt. 219 func (s *Socket) SetSockOpt(t *kernel.Task, level, name int, opt []byte) *syserr.Error { 220 sockOptMapOnce.Do(func() { initSockOptMap(t) }) 221 222 // Special case send/recv timeouts since those are handled internally. 223 if level == linux.SOL_SOCKET { 224 switch name { 225 case linux.SO_RCVTIMEO: 226 optLen := linux.SizeOfTimeval 227 var v linux.Timeval 228 v.UnmarshalBytes(opt[:optLen]) 229 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 230 return syserr.ErrDomain 231 } 232 s.SetRecvTimeout(v.ToNsecCapped()) 233 return nil 234 case linux.SO_SNDTIMEO: 235 optLen := linux.SizeOfTimeval 236 var v linux.Timeval 237 v.UnmarshalBytes(opt[:optLen]) 238 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 239 return syserr.ErrDomain 240 } 241 s.SetSendTimeout(v.ToNsecCapped()) 242 return nil 243 } 244 } 245 sockOpt, ok := sockOptMap[levelName{uint64(level), uint64(name)}] 246 if !ok { 247 // Pretend to accept socket options we don't understand. This 248 // seems dangerous, but it's what netstack does... 249 return nil 250 } 251 if !sockOpt.AllowSet { 252 return syserr.ErrInvalidArgument 253 } 254 if sockOpt.Size > 0 { 255 if uint64(len(opt)) < sockOpt.Size { 256 return syserr.ErrInvalidArgument 257 } 258 opt = opt[:sockOpt.Size] 259 } 260 if _, _, errno := unix.Syscall6(unix.SYS_SETSOCKOPT, uintptr(s.fd), uintptr(level), uintptr(name), uintptr(firstBytePtr(opt)), uintptr(len(opt)), 0); errno != 0 { 261 return syserr.FromError(errno) 262 } 263 return nil 264 }