gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/socket/hostinet/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hostinet 16 17 import ( 18 "fmt" 19 "io" 20 "io/ioutil" 21 "os" 22 "reflect" 23 "strconv" 24 "strings" 25 26 "gvisor.dev/gvisor/pkg/abi/linux" 27 "gvisor.dev/gvisor/pkg/context" 28 "gvisor.dev/gvisor/pkg/errors/linuxerr" 29 "gvisor.dev/gvisor/pkg/log" 30 "gvisor.dev/gvisor/pkg/sentry/inet" 31 "gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg" 32 "gvisor.dev/gvisor/pkg/syserr" 33 "gvisor.dev/gvisor/pkg/tcpip" 34 "gvisor.dev/gvisor/pkg/tcpip/stack" 35 "gvisor.dev/gvisor/pkg/usermem" 36 ) 37 38 var defaultRecvBufSize = inet.TCPBufferSize{ 39 Min: 4096, 40 Default: 87380, 41 Max: 6291456, 42 } 43 44 var defaultSendBufSize = inet.TCPBufferSize{ 45 Min: 4096, 46 Default: 16384, 47 Max: 4194304, 48 } 49 50 // Stack implements inet.Stack for host sockets. 51 type Stack struct { 52 // Stack is immutable. 53 supportsIPv6 bool 54 tcpRecovery inet.TCPLossRecovery 55 tcpRecvBufSize inet.TCPBufferSize 56 tcpSendBufSize inet.TCPBufferSize 57 tcpSACKEnabled bool 58 netDevFile *os.File 59 netSNMPFile *os.File 60 // allowedSocketTypes is the list of allowed socket types 61 allowedSocketTypes []AllowedSocketType 62 } 63 64 // Destroy implements inet.Stack.Destroy. 65 func (*Stack) Destroy() { 66 } 67 68 // NewStack returns an empty Stack containing no configuration. 69 func NewStack() *Stack { 70 return &Stack{} 71 } 72 73 // Configure sets up the stack using the current state of the host network. 74 func (s *Stack) Configure(allowRawSockets bool) error { 75 if _, err := os.Stat("/proc/net/if_inet6"); err == nil { 76 s.supportsIPv6 = true 77 } 78 79 s.tcpRecvBufSize = defaultRecvBufSize 80 if tcpRMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_rmem"); err == nil { 81 s.tcpRecvBufSize = tcpRMem 82 } else { 83 log.Warningf("Failed to read TCP receive buffer size, using default values") 84 } 85 86 s.tcpSendBufSize = defaultSendBufSize 87 if tcpWMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_wmem"); err == nil { 88 s.tcpSendBufSize = tcpWMem 89 } else { 90 log.Warningf("Failed to read TCP send buffer size, using default values") 91 } 92 93 // SACK is important for performance and even compatibility, assume it's 94 // enabled if we can't find the actual value. 95 s.tcpSACKEnabled = true 96 if sack, err := ioutil.ReadFile("/proc/sys/net/ipv4/tcp_sack"); err == nil { 97 s.tcpSACKEnabled = strings.TrimSpace(string(sack)) != "0" 98 } else { 99 log.Warningf("Failed to read if TCP SACK if enabled, setting to true") 100 } 101 102 if f, err := os.Open("/proc/net/dev"); err != nil { 103 log.Warningf("Failed to open /proc/net/dev: %v", err) 104 } else { 105 s.netDevFile = f 106 } 107 108 if f, err := os.Open("/proc/net/snmp"); err != nil { 109 log.Warningf("Failed to open /proc/net/snmp: %v", err) 110 } else { 111 s.netSNMPFile = f 112 } 113 114 s.allowedSocketTypes = AllowedSocketTypes 115 if allowRawSockets { 116 s.allowedSocketTypes = append(s.allowedSocketTypes, AllowedRawSocketTypes...) 117 } 118 119 return nil 120 } 121 122 func readTCPBufferSizeFile(filename string) (inet.TCPBufferSize, error) { 123 contents, err := ioutil.ReadFile(filename) 124 if err != nil { 125 return inet.TCPBufferSize{}, fmt.Errorf("failed to read %s: %v", filename, err) 126 } 127 ioseq := usermem.BytesIOSequence(contents) 128 fields := make([]int32, 3) 129 if n, err := usermem.CopyInt32StringsInVec(context.Background(), ioseq.IO, ioseq.Addrs, fields, ioseq.Opts); n != ioseq.NumBytes() || err != nil { 130 return inet.TCPBufferSize{}, fmt.Errorf("failed to parse %s (%q): got %v after %d/%d bytes", filename, contents, err, n, ioseq.NumBytes()) 131 } 132 return inet.TCPBufferSize{ 133 Min: int(fields[0]), 134 Default: int(fields[1]), 135 Max: int(fields[2]), 136 }, nil 137 } 138 139 // Interfaces implements inet.Stack.Interfaces. 140 func (s *Stack) Interfaces() map[int32]inet.Interface { 141 ifs, err := getInterfaces() 142 if err != nil { 143 log.Warningf("could not get host interface: %v", err) 144 return nil 145 } 146 147 // query interface features for each of the host interfaces. 148 if err := queryInterfaceFeatures(ifs); err != nil { 149 log.Warningf("could not query host interfaces: %v", err) 150 return nil 151 } 152 return ifs 153 } 154 155 // RemoveInterface implements inet.Stack.RemoveInterface. 156 func (*Stack) RemoveInterface(idx int32) error { 157 return removeInterface(idx) 158 } 159 160 // InterfaceAddrs implements inet.Stack.InterfaceAddrs. 161 func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { 162 addrs, err := getInterfaceAddrs() 163 if err != nil { 164 log.Warningf("failed to get host interface addresses: %v", err) 165 return nil 166 } 167 return addrs 168 } 169 170 // SetInterface implements inet.Stack.SetInterface. 171 func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Error { 172 var ifinfomsg linux.InterfaceInfoMessage 173 attrs, ok := msg.GetData(&ifinfomsg) 174 if !ok { 175 return syserr.ErrInvalidArgument 176 } 177 for !attrs.Empty() { 178 // The index is unspecified, search by the interface name. 179 ahdr, value, rest, ok := attrs.ParseFirst() 180 if !ok { 181 return syserr.ErrInvalidArgument 182 } 183 attrs = rest 184 switch ahdr.Type { 185 case linux.IFLA_IFNAME: 186 if len(value) < 1 { 187 return syserr.ErrInvalidArgument 188 } 189 if ifinfomsg.Index != 0 { 190 // Device name changing isn't supported yet. 191 return syserr.ErrNotSupported 192 } 193 ifname := string(value[:len(value)-1]) 194 for idx, ifa := range s.Interfaces() { 195 if ifname == ifa.Name { 196 ifinfomsg.Index = idx 197 break 198 } 199 } 200 default: 201 ctx.Warningf("unexpected attribute: %x", ahdr.Type) 202 return syserr.ErrNotSupported 203 } 204 } 205 if ifinfomsg.Index == 0 { 206 return syserr.ErrNoDevice 207 } 208 209 flags := msg.Header().Flags 210 if flags&(linux.NLM_F_EXCL|linux.NLM_F_REPLACE) != 0 { 211 return syserr.ErrExists 212 } 213 214 if ifinfomsg.Flags != 0 || ifinfomsg.Change != 0 { 215 if ifinfomsg.Change & ^uint32(linux.IFF_UP) != 0 { 216 ctx.Warningf("Unsupported ifi_change flags: %x", ifinfomsg.Change) 217 return syserr.ErrInvalidArgument 218 } 219 if ifinfomsg.Flags & ^uint32(linux.IFF_UP) != 0 { 220 ctx.Warningf("Unsupported ifi_flags: %x", ifinfomsg.Change) 221 return syserr.ErrInvalidArgument 222 } 223 // Netstack interfaces are always up. 224 } 225 return nil 226 } 227 228 // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. 229 func (*Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { 230 return addInterfaceAddr(idx, addr) 231 } 232 233 // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. 234 func (*Stack) RemoveInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { 235 return removeInterfaceAddr(idx, addr) 236 } 237 238 // SupportsIPv6 implements inet.Stack.SupportsIPv6. 239 func (s *Stack) SupportsIPv6() bool { 240 return s.supportsIPv6 241 } 242 243 // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. 244 func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { 245 return s.tcpRecvBufSize, nil 246 } 247 248 // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. 249 func (*Stack) SetTCPReceiveBufferSize(inet.TCPBufferSize) error { 250 return linuxerr.EACCES 251 } 252 253 // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. 254 func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { 255 return s.tcpSendBufSize, nil 256 } 257 258 // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. 259 func (*Stack) SetTCPSendBufferSize(inet.TCPBufferSize) error { 260 return linuxerr.EACCES 261 } 262 263 // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. 264 func (s *Stack) TCPSACKEnabled() (bool, error) { 265 return s.tcpSACKEnabled, nil 266 } 267 268 // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. 269 func (*Stack) SetTCPSACKEnabled(bool) error { 270 return linuxerr.EACCES 271 } 272 273 // TCPRecovery implements inet.Stack.TCPRecovery. 274 func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { 275 return s.tcpRecovery, nil 276 } 277 278 // SetTCPRecovery implements inet.Stack.SetTCPRecovery. 279 func (*Stack) SetTCPRecovery(inet.TCPLossRecovery) error { 280 return linuxerr.EACCES 281 } 282 283 // getLine reads one line from proc file, with specified prefix. 284 // The last argument, withHeader, specifies if it contains line header. 285 func getLine(f *os.File, prefix string, withHeader bool) string { 286 data := make([]byte, 4096) 287 288 if _, err := f.Seek(0, 0); err != nil { 289 return "" 290 } 291 292 if _, err := io.ReadFull(f, data); err != io.ErrUnexpectedEOF { 293 return "" 294 } 295 296 prefix = prefix + ":" 297 lines := strings.Split(string(data), "\n") 298 for _, l := range lines { 299 l = strings.TrimSpace(l) 300 if strings.HasPrefix(l, prefix) { 301 if withHeader { 302 withHeader = false 303 continue 304 } 305 return l 306 } 307 } 308 return "" 309 } 310 311 func toSlice(i any) []uint64 { 312 v := reflect.Indirect(reflect.ValueOf(i)) 313 return v.Slice(0, v.Len()).Interface().([]uint64) 314 } 315 316 // Statistics implements inet.Stack.Statistics. 317 func (s *Stack) Statistics(stat any, arg string) error { 318 var ( 319 snmpTCP bool 320 rawLine string 321 sliceStat []uint64 322 ) 323 324 switch stat.(type) { 325 case *inet.StatDev: 326 if s.netDevFile == nil { 327 return fmt.Errorf("/proc/net/dev is not opened for hostinet") 328 } 329 rawLine = getLine(s.netDevFile, arg, false /* with no header */) 330 case *inet.StatSNMPIP, *inet.StatSNMPICMP, *inet.StatSNMPICMPMSG, *inet.StatSNMPTCP, *inet.StatSNMPUDP, *inet.StatSNMPUDPLite: 331 if s.netSNMPFile == nil { 332 return fmt.Errorf("/proc/net/snmp is not opened for hostinet") 333 } 334 rawLine = getLine(s.netSNMPFile, arg, true) 335 default: 336 return syserr.ErrEndpointOperation.ToError() 337 } 338 339 if rawLine == "" { 340 return fmt.Errorf("failed to get raw line") 341 } 342 343 parts := strings.SplitN(rawLine, ":", 2) 344 if len(parts) != 2 { 345 return fmt.Errorf("failed to get prefix from: %q", rawLine) 346 } 347 348 sliceStat = toSlice(stat) 349 fields := strings.Fields(strings.TrimSpace(parts[1])) 350 if len(fields) != len(sliceStat) { 351 return fmt.Errorf("failed to parse fields: %q", rawLine) 352 } 353 if _, ok := stat.(*inet.StatSNMPTCP); ok { 354 snmpTCP = true 355 } 356 for i := 0; i < len(sliceStat); i++ { 357 var err error 358 if snmpTCP && i == 3 { 359 var tmp int64 360 // MaxConn field is signed, RFC 2012. 361 tmp, err = strconv.ParseInt(fields[i], 10, 64) 362 sliceStat[i] = uint64(tmp) // Convert back to int before use. 363 } else { 364 sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64) 365 } 366 if err != nil { 367 return fmt.Errorf("failed to parse field %d from: %q, %v", i, rawLine, err) 368 } 369 } 370 371 return nil 372 } 373 374 // RouteTable implements inet.Stack.RouteTable. 375 func (s *Stack) RouteTable() []inet.Route { 376 routes, err := getRoutes() 377 if err != nil { 378 log.Warningf("failed to get routes: %v", err) 379 return nil 380 } 381 // Prepend empty route. 382 return append([]inet.Route(nil), routes...) 383 } 384 385 // Pause implements inet.Stack.Pause. 386 func (*Stack) Pause() {} 387 388 // Restore implements inet.Stack.Restore. 389 func (*Stack) Restore() {} 390 391 // Resume implements inet.Stack.Resume. 392 func (*Stack) Resume() {} 393 394 // RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. 395 func (*Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } 396 397 // CleanupEndpoints implements inet.Stack.CleanupEndpoints. 398 func (*Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } 399 400 // RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. 401 func (*Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} 402 403 // SetForwarding implements inet.Stack.SetForwarding. 404 func (*Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { 405 return linuxerr.EACCES 406 } 407 408 // PortRange implements inet.Stack.PortRange. 409 func (*Stack) PortRange() (uint16, uint16) { 410 // Use the default Linux values per net/ipv4/af_inet.c:inet_init_net(). 411 return 32768, 60999 412 } 413 414 // SetPortRange implements inet.Stack.SetPortRange. 415 func (*Stack) SetPortRange(uint16, uint16) error { 416 return linuxerr.EACCES 417 }