github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/hostinet/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hostinet 16 17 import ( 18 "encoding/binary" 19 "fmt" 20 "io" 21 "io/ioutil" 22 "os" 23 "reflect" 24 "strconv" 25 "strings" 26 27 "syscall" 28 29 "golang.org/x/sys/unix" 30 "github.com/SagerNet/gvisor/pkg/abi/linux" 31 "github.com/SagerNet/gvisor/pkg/context" 32 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 33 "github.com/SagerNet/gvisor/pkg/log" 34 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 35 "github.com/SagerNet/gvisor/pkg/sentry/inet" 36 "github.com/SagerNet/gvisor/pkg/syserr" 37 "github.com/SagerNet/gvisor/pkg/tcpip" 38 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 39 "github.com/SagerNet/gvisor/pkg/usermem" 40 ) 41 42 var defaultRecvBufSize = inet.TCPBufferSize{ 43 Min: 4096, 44 Default: 87380, 45 Max: 6291456, 46 } 47 48 var defaultSendBufSize = inet.TCPBufferSize{ 49 Min: 4096, 50 Default: 16384, 51 Max: 4194304, 52 } 53 54 // Stack implements inet.Stack for host sockets. 55 type Stack struct { 56 // Stack is immutable. 57 interfaces map[int32]inet.Interface 58 interfaceAddrs map[int32][]inet.InterfaceAddr 59 routes []inet.Route 60 supportsIPv6 bool 61 tcpRecovery inet.TCPLossRecovery 62 tcpRecvBufSize inet.TCPBufferSize 63 tcpSendBufSize inet.TCPBufferSize 64 tcpSACKEnabled bool 65 netDevFile *os.File 66 netSNMPFile *os.File 67 } 68 69 // NewStack returns an empty Stack containing no configuration. 70 func NewStack() *Stack { 71 return &Stack{ 72 interfaces: make(map[int32]inet.Interface), 73 interfaceAddrs: make(map[int32][]inet.InterfaceAddr), 74 } 75 } 76 77 // Configure sets up the stack using the current state of the host network. 78 func (s *Stack) Configure() error { 79 if err := addHostInterfaces(s); err != nil { 80 return err 81 } 82 83 if err := addHostRoutes(s); err != nil { 84 return err 85 } 86 87 if _, err := os.Stat("/proc/net/if_inet6"); err == nil { 88 s.supportsIPv6 = true 89 } 90 91 s.tcpRecvBufSize = defaultRecvBufSize 92 if tcpRMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_rmem"); err == nil { 93 s.tcpRecvBufSize = tcpRMem 94 } else { 95 log.Warningf("Failed to read TCP receive buffer size, using default values") 96 } 97 98 s.tcpSendBufSize = defaultSendBufSize 99 if tcpWMem, err := readTCPBufferSizeFile("/proc/sys/net/ipv4/tcp_wmem"); err == nil { 100 s.tcpSendBufSize = tcpWMem 101 } else { 102 log.Warningf("Failed to read TCP send buffer size, using default values") 103 } 104 105 // SACK is important for performance and even compatibility, assume it's 106 // enabled if we can't find the actual value. 107 s.tcpSACKEnabled = true 108 if sack, err := ioutil.ReadFile("/proc/sys/net/ipv4/tcp_sack"); err == nil { 109 s.tcpSACKEnabled = strings.TrimSpace(string(sack)) != "0" 110 } else { 111 log.Warningf("Failed to read if TCP SACK if enabled, setting to true") 112 } 113 114 if f, err := os.Open("/proc/net/dev"); err != nil { 115 log.Warningf("Failed to open /proc/net/dev: %v", err) 116 } else { 117 s.netDevFile = f 118 } 119 120 if f, err := os.Open("/proc/net/snmp"); err != nil { 121 log.Warningf("Failed to open /proc/net/snmp: %v", err) 122 } else { 123 s.netSNMPFile = f 124 } 125 126 return nil 127 } 128 129 // ExtractHostInterfaces will populate an interface map and 130 // interfaceAddrs map with the results of the equivalent 131 // netlink messages. 132 func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.NetlinkMessage, interfaces map[int32]inet.Interface, interfaceAddrs map[int32][]inet.InterfaceAddr) error { 133 for _, link := range links { 134 if link.Header.Type != unix.RTM_NEWLINK { 135 continue 136 } 137 if len(link.Data) < unix.SizeofIfInfomsg { 138 return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid data length (%d bytes, expected at least %d bytes)", len(link.Data), unix.SizeofIfInfomsg) 139 } 140 var ifinfo linux.InterfaceInfoMessage 141 ifinfo.UnmarshalUnsafe(link.Data[:ifinfo.SizeBytes()]) 142 inetIF := inet.Interface{ 143 DeviceType: ifinfo.Type, 144 Flags: ifinfo.Flags, 145 } 146 // Not clearly documented: syscall.ParseNetlinkRouteAttr will check the 147 // syscall.NetlinkMessage.Header.Type and skip the struct ifinfomsg 148 // accordingly. 149 attrs, err := syscall.ParseNetlinkRouteAttr(&link) 150 if err != nil { 151 return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid rtattrs: %v", err) 152 } 153 for _, attr := range attrs { 154 switch attr.Attr.Type { 155 case unix.IFLA_ADDRESS: 156 inetIF.Addr = attr.Value 157 case unix.IFLA_IFNAME: 158 inetIF.Name = string(attr.Value[:len(attr.Value)-1]) 159 } 160 } 161 interfaces[ifinfo.Index] = inetIF 162 } 163 164 for _, addr := range addrs { 165 if addr.Header.Type != unix.RTM_NEWADDR { 166 continue 167 } 168 if len(addr.Data) < unix.SizeofIfAddrmsg { 169 return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid data length (%d bytes, expected at least %d bytes)", len(addr.Data), unix.SizeofIfAddrmsg) 170 } 171 var ifaddr linux.InterfaceAddrMessage 172 ifaddr.UnmarshalUnsafe(addr.Data[:ifaddr.SizeBytes()]) 173 inetAddr := inet.InterfaceAddr{ 174 Family: ifaddr.Family, 175 PrefixLen: ifaddr.PrefixLen, 176 Flags: ifaddr.Flags, 177 } 178 attrs, err := syscall.ParseNetlinkRouteAttr(&addr) 179 if err != nil { 180 return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid rtattrs: %v", err) 181 } 182 for _, attr := range attrs { 183 switch attr.Attr.Type { 184 case unix.IFA_ADDRESS: 185 inetAddr.Addr = attr.Value 186 } 187 } 188 interfaceAddrs[int32(ifaddr.Index)] = append(interfaceAddrs[int32(ifaddr.Index)], inetAddr) 189 } 190 191 return nil 192 } 193 194 // ExtractHostRoutes populates the given routes slice with the data from the 195 // host route table. 196 func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error) { 197 var routes []inet.Route 198 for _, routeMsg := range routeMsgs { 199 if routeMsg.Header.Type != unix.RTM_NEWROUTE { 200 continue 201 } 202 203 var ifRoute linux.RouteMessage 204 ifRoute.UnmarshalUnsafe(routeMsg.Data[:ifRoute.SizeBytes()]) 205 inetRoute := inet.Route{ 206 Family: ifRoute.Family, 207 DstLen: ifRoute.DstLen, 208 SrcLen: ifRoute.SrcLen, 209 TOS: ifRoute.TOS, 210 Table: ifRoute.Table, 211 Protocol: ifRoute.Protocol, 212 Scope: ifRoute.Scope, 213 Type: ifRoute.Type, 214 Flags: ifRoute.Flags, 215 } 216 217 // Not clearly documented: syscall.ParseNetlinkRouteAttr will check the 218 // syscall.NetlinkMessage.Header.Type and skip the struct rtmsg 219 // accordingly. 220 attrs, err := syscall.ParseNetlinkRouteAttr(&routeMsg) 221 if err != nil { 222 return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid rtattrs: %v", err) 223 } 224 225 for _, attr := range attrs { 226 switch attr.Attr.Type { 227 case unix.RTA_DST: 228 inetRoute.DstAddr = attr.Value 229 case unix.RTA_SRC: 230 inetRoute.SrcAddr = attr.Value 231 case unix.RTA_GATEWAY: 232 inetRoute.GatewayAddr = attr.Value 233 case unix.RTA_OIF: 234 expected := int(binary.Size(inetRoute.OutputInterface)) 235 if len(attr.Value) != expected { 236 return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected) 237 } 238 var outputIF primitive.Int32 239 outputIF.UnmarshalUnsafe(attr.Value) 240 inetRoute.OutputInterface = int32(outputIF) 241 } 242 } 243 244 routes = append(routes, inetRoute) 245 } 246 247 return routes, nil 248 } 249 250 func addHostInterfaces(s *Stack) error { 251 links, err := doNetlinkRouteRequest(unix.RTM_GETLINK) 252 if err != nil { 253 return fmt.Errorf("RTM_GETLINK failed: %v", err) 254 } 255 256 addrs, err := doNetlinkRouteRequest(unix.RTM_GETADDR) 257 if err != nil { 258 return fmt.Errorf("RTM_GETADDR failed: %v", err) 259 } 260 261 return ExtractHostInterfaces(links, addrs, s.interfaces, s.interfaceAddrs) 262 } 263 264 func addHostRoutes(s *Stack) error { 265 routes, err := doNetlinkRouteRequest(unix.RTM_GETROUTE) 266 if err != nil { 267 return fmt.Errorf("RTM_GETROUTE failed: %v", err) 268 } 269 270 s.routes, err = ExtractHostRoutes(routes) 271 if err != nil { 272 return err 273 } 274 275 return nil 276 } 277 278 func doNetlinkRouteRequest(req int) ([]syscall.NetlinkMessage, error) { 279 data, err := syscall.NetlinkRIB(req, syscall.AF_UNSPEC) 280 if err != nil { 281 return nil, err 282 } 283 return syscall.ParseNetlinkMessage(data) 284 } 285 286 func readTCPBufferSizeFile(filename string) (inet.TCPBufferSize, error) { 287 contents, err := ioutil.ReadFile(filename) 288 if err != nil { 289 return inet.TCPBufferSize{}, fmt.Errorf("failed to read %s: %v", filename, err) 290 } 291 ioseq := usermem.BytesIOSequence(contents) 292 fields := make([]int32, 3) 293 if n, err := usermem.CopyInt32StringsInVec(context.Background(), ioseq.IO, ioseq.Addrs, fields, ioseq.Opts); n != ioseq.NumBytes() || err != nil { 294 return inet.TCPBufferSize{}, fmt.Errorf("failed to parse %s (%q): got %v after %d/%d bytes", filename, contents, err, n, ioseq.NumBytes()) 295 } 296 return inet.TCPBufferSize{ 297 Min: int(fields[0]), 298 Default: int(fields[1]), 299 Max: int(fields[2]), 300 }, nil 301 } 302 303 // Interfaces implements inet.Stack.Interfaces. 304 func (s *Stack) Interfaces() map[int32]inet.Interface { 305 interfaces := make(map[int32]inet.Interface) 306 for k, v := range s.interfaces { 307 interfaces[k] = v 308 } 309 return interfaces 310 } 311 312 // InterfaceAddrs implements inet.Stack.InterfaceAddrs. 313 func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { 314 addrs := make(map[int32][]inet.InterfaceAddr) 315 for k, v := range s.interfaceAddrs { 316 addrs[k] = append([]inet.InterfaceAddr(nil), v...) 317 } 318 return addrs 319 } 320 321 // AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. 322 func (s *Stack) AddInterfaceAddr(int32, inet.InterfaceAddr) error { 323 return linuxerr.EACCES 324 } 325 326 // RemoveInterfaceAddr implements inet.Stack.RemoveInterfaceAddr. 327 func (s *Stack) RemoveInterfaceAddr(int32, inet.InterfaceAddr) error { 328 return linuxerr.EACCES 329 } 330 331 // SupportsIPv6 implements inet.Stack.SupportsIPv6. 332 func (s *Stack) SupportsIPv6() bool { 333 return s.supportsIPv6 334 } 335 336 // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. 337 func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { 338 return s.tcpRecvBufSize, nil 339 } 340 341 // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. 342 func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { 343 return linuxerr.EACCES 344 } 345 346 // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. 347 func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { 348 return s.tcpSendBufSize, nil 349 } 350 351 // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. 352 func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { 353 return linuxerr.EACCES 354 } 355 356 // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. 357 func (s *Stack) TCPSACKEnabled() (bool, error) { 358 return s.tcpSACKEnabled, nil 359 } 360 361 // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. 362 func (s *Stack) SetTCPSACKEnabled(bool) error { 363 return linuxerr.EACCES 364 } 365 366 // TCPRecovery implements inet.Stack.TCPRecovery. 367 func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { 368 return s.tcpRecovery, nil 369 } 370 371 // SetTCPRecovery implements inet.Stack.SetTCPRecovery. 372 func (s *Stack) SetTCPRecovery(inet.TCPLossRecovery) error { 373 return linuxerr.EACCES 374 } 375 376 // getLine reads one line from proc file, with specified prefix. 377 // The last argument, withHeader, specifies if it contains line header. 378 func getLine(f *os.File, prefix string, withHeader bool) string { 379 data := make([]byte, 4096) 380 381 if _, err := f.Seek(0, 0); err != nil { 382 return "" 383 } 384 385 if _, err := io.ReadFull(f, data); err != io.ErrUnexpectedEOF { 386 return "" 387 } 388 389 prefix = prefix + ":" 390 lines := strings.Split(string(data), "\n") 391 for _, l := range lines { 392 l = strings.TrimSpace(l) 393 if strings.HasPrefix(l, prefix) { 394 if withHeader { 395 withHeader = false 396 continue 397 } 398 return l 399 } 400 } 401 return "" 402 } 403 404 func toSlice(i interface{}) []uint64 { 405 v := reflect.Indirect(reflect.ValueOf(i)) 406 return v.Slice(0, v.Len()).Interface().([]uint64) 407 } 408 409 // Statistics implements inet.Stack.Statistics. 410 func (s *Stack) Statistics(stat interface{}, arg string) error { 411 var ( 412 snmpTCP bool 413 rawLine string 414 sliceStat []uint64 415 ) 416 417 switch stat.(type) { 418 case *inet.StatDev: 419 if s.netDevFile == nil { 420 return fmt.Errorf("/proc/net/dev is not opened for hostinet") 421 } 422 rawLine = getLine(s.netDevFile, arg, false /* with no header */) 423 case *inet.StatSNMPIP, *inet.StatSNMPICMP, *inet.StatSNMPICMPMSG, *inet.StatSNMPTCP, *inet.StatSNMPUDP, *inet.StatSNMPUDPLite: 424 if s.netSNMPFile == nil { 425 return fmt.Errorf("/proc/net/snmp is not opened for hostinet") 426 } 427 rawLine = getLine(s.netSNMPFile, arg, true) 428 default: 429 return syserr.ErrEndpointOperation.ToError() 430 } 431 432 if rawLine == "" { 433 return fmt.Errorf("failed to get raw line") 434 } 435 436 parts := strings.SplitN(rawLine, ":", 2) 437 if len(parts) != 2 { 438 return fmt.Errorf("failed to get prefix from: %q", rawLine) 439 } 440 441 sliceStat = toSlice(stat) 442 fields := strings.Fields(strings.TrimSpace(parts[1])) 443 if len(fields) != len(sliceStat) { 444 return fmt.Errorf("failed to parse fields: %q", rawLine) 445 } 446 if _, ok := stat.(*inet.StatSNMPTCP); ok { 447 snmpTCP = true 448 } 449 for i := 0; i < len(sliceStat); i++ { 450 var err error 451 if snmpTCP && i == 3 { 452 var tmp int64 453 // MaxConn field is signed, RFC 2012. 454 tmp, err = strconv.ParseInt(fields[i], 10, 64) 455 sliceStat[i] = uint64(tmp) // Convert back to int before use. 456 } else { 457 sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64) 458 } 459 if err != nil { 460 return fmt.Errorf("failed to parse field %d from: %q, %v", i, rawLine, err) 461 } 462 } 463 464 return nil 465 } 466 467 // RouteTable implements inet.Stack.RouteTable. 468 func (s *Stack) RouteTable() []inet.Route { 469 return append([]inet.Route(nil), s.routes...) 470 } 471 472 // Resume implements inet.Stack.Resume. 473 func (s *Stack) Resume() {} 474 475 // RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. 476 func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } 477 478 // CleanupEndpoints implements inet.Stack.CleanupEndpoints. 479 func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } 480 481 // RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. 482 func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} 483 484 // SetForwarding implements inet.Stack.SetForwarding. 485 func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { 486 return linuxerr.EACCES 487 } 488 489 // PortRange implements inet.Stack.PortRange. 490 func (*Stack) PortRange() (uint16, uint16) { 491 // Use the default Linux values per net/ipv4/af_inet.c:inet_init_net(). 492 return 32768, 28232 493 } 494 495 // SetPortRange implements inet.Stack.SetPortRange. 496 func (*Stack) SetPortRange(start uint16, end uint16) error { 497 return linuxerr.EACCES 498 }