github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/boot/network.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package boot 16 17 import ( 18 "fmt" 19 "net" 20 "os" 21 "runtime" 22 "strings" 23 "time" 24 25 "github.com/MerlinKodo/gvisor/pkg/hostos" 26 "github.com/MerlinKodo/gvisor/pkg/log" 27 "github.com/MerlinKodo/gvisor/pkg/tcpip" 28 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/ethernet" 29 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/fdbased" 30 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/loopback" 31 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/packetsocket" 32 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/qdisc/fifo" 33 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/sniffer" 34 "github.com/MerlinKodo/gvisor/pkg/tcpip/link/xdp" 35 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/ipv4" 36 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/ipv6" 37 "github.com/MerlinKodo/gvisor/pkg/tcpip/stack" 38 "github.com/MerlinKodo/gvisor/pkg/urpc" 39 "github.com/MerlinKodo/gvisor/runsc/config" 40 "golang.org/x/sys/unix" 41 ) 42 43 var ( 44 // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and 45 // "::1/8" on "lo" interface. 46 DefaultLoopbackLink = LoopbackLink{ 47 Name: "lo", 48 Addresses: []IPWithPrefix{ 49 {Address: net.IP("\x7f\x00\x00\x01"), PrefixLen: 8}, 50 {Address: net.IPv6loopback, PrefixLen: 128}, 51 }, 52 Routes: []Route{ 53 { 54 Destination: net.IPNet{ 55 IP: net.IPv4(0x7f, 0, 0, 0), 56 Mask: net.IPv4Mask(0xff, 0, 0, 0), 57 }, 58 }, 59 { 60 Destination: net.IPNet{ 61 IP: net.IPv6loopback, 62 Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), 63 }, 64 }, 65 }, 66 } 67 ) 68 69 // Network exposes methods that can be used to configure a network stack. 70 type Network struct { 71 Stack *stack.Stack 72 } 73 74 // Route represents a route in the network stack. 75 type Route struct { 76 Destination net.IPNet 77 Gateway net.IP 78 } 79 80 // DefaultRoute represents a catch all route to the default gateway. 81 type DefaultRoute struct { 82 Route Route 83 Name string 84 } 85 86 type Neighbor struct { 87 IP net.IP 88 HardwareAddr net.HardwareAddr 89 } 90 91 // FDBasedLink configures an fd-based link. 92 type FDBasedLink struct { 93 Name string 94 InterfaceIndex int 95 MTU int 96 Addresses []IPWithPrefix 97 Routes []Route 98 GSOMaxSize uint32 99 GvisorGSOEnabled bool 100 GvisorGROTimeout time.Duration 101 TXChecksumOffload bool 102 RXChecksumOffload bool 103 LinkAddress net.HardwareAddr 104 QDisc config.QueueingDiscipline 105 Neighbors []Neighbor 106 107 // NumChannels controls how many underlying FDs are to be used to 108 // create this endpoint. 109 NumChannels int 110 } 111 112 // XDPLink configures an XDP link. 113 type XDPLink struct { 114 Name string 115 InterfaceIndex int 116 MTU int 117 Addresses []IPWithPrefix 118 Routes []Route 119 TXChecksumOffload bool 120 RXChecksumOffload bool 121 LinkAddress net.HardwareAddr 122 QDisc config.QueueingDiscipline 123 Neighbors []Neighbor 124 GvisorGROTimeout time.Duration 125 126 // NumChannels controls how many underlying FDs are to be used to 127 // create this endpoint. 128 NumChannels int 129 } 130 131 // LoopbackLink configures a loopback link. 132 type LoopbackLink struct { 133 Name string 134 Addresses []IPWithPrefix 135 Routes []Route 136 GvisorGROTimeout time.Duration 137 } 138 139 // CreateLinksAndRoutesArgs are arguments to CreateLinkAndRoutes. 140 type CreateLinksAndRoutesArgs struct { 141 // FilePayload contains the fds associated with the FDBasedLinks. The 142 // number of fd's should match the sum of the NumChannels field of the 143 // FDBasedLink entries below. 144 urpc.FilePayload 145 146 LoopbackLinks []LoopbackLink 147 FDBasedLinks []FDBasedLink 148 XDPLinks []XDPLink 149 150 Defaultv4Gateway DefaultRoute 151 Defaultv6Gateway DefaultRoute 152 153 // PCAP indicates that FilePayload also contains a PCAP log file. 154 PCAP bool 155 } 156 157 // IPWithPrefix is an address with its subnet prefix length. 158 type IPWithPrefix struct { 159 // Address is a network address. 160 Address net.IP 161 162 // PrefixLen is the subnet prefix length. 163 PrefixLen int 164 } 165 166 func (ip IPWithPrefix) String() string { 167 return fmt.Sprintf("%s/%d", ip.Address, ip.PrefixLen) 168 } 169 170 // Empty returns true if route hasn't been set. 171 func (r *Route) Empty() bool { 172 return r.Destination.IP == nil && r.Destination.Mask == nil && r.Gateway == nil 173 } 174 175 func (r *Route) toTcpipRoute(id tcpip.NICID) (tcpip.Route, error) { 176 subnet, err := tcpip.NewSubnet(ipToAddress(r.Destination.IP), ipMaskToAddressMask(r.Destination.Mask)) 177 if err != nil { 178 return tcpip.Route{}, err 179 } 180 return tcpip.Route{ 181 Destination: subnet, 182 Gateway: ipToAddress(r.Gateway), 183 NIC: id, 184 }, nil 185 } 186 187 // CreateLinksAndRoutes creates links and routes in a network stack. It should 188 // only be called once. 189 func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct{}) error { 190 if len(args.FDBasedLinks) > 0 && len(args.XDPLinks) > 0 { 191 return fmt.Errorf("received both fdbased and XDP links, but only one can be used at a time") 192 } 193 wantFDs := 0 194 for _, l := range args.FDBasedLinks { 195 wantFDs += l.NumChannels 196 } 197 if len(args.XDPLinks) > 0 { 198 wantFDs += 4 199 } 200 if args.PCAP { 201 wantFDs++ 202 } 203 if got := len(args.FilePayload.Files); got != wantFDs { 204 return fmt.Errorf("args.FilePayload.Files has %d FDs but we need %d entries based on FDBasedLinks, XDPLinks, and PCAP", got, wantFDs) 205 } 206 207 var nicID tcpip.NICID 208 nicids := make(map[string]tcpip.NICID) 209 210 // Collect routes from all links. 211 var routes []tcpip.Route 212 213 // Loopback normally appear before other interfaces. 214 for _, link := range args.LoopbackLinks { 215 nicID++ 216 nicids[link.Name] = nicID 217 218 linkEP := packetsocket.New(ethernet.New(loopback.New())) 219 220 log.Infof("Enabling loopback interface %q with id %d on addresses %+v", link.Name, nicID, link.Addresses) 221 opts := stack.NICOptions{ 222 Name: link.Name, 223 GROTimeout: link.GvisorGROTimeout, 224 } 225 if err := n.createNICWithAddrs(nicID, linkEP, opts, link.Addresses); err != nil { 226 return err 227 } 228 229 // Collect the routes from this link. 230 for _, r := range link.Routes { 231 route, err := r.toTcpipRoute(nicID) 232 if err != nil { 233 return err 234 } 235 routes = append(routes, route) 236 } 237 } 238 239 // Setup fdbased or XDP links. 240 if len(args.FDBasedLinks) > 0 { 241 // Choose a dispatch mode. 242 dispatchMode := fdbased.RecvMMsg 243 version, err := hostos.KernelVersion() 244 if err != nil { 245 return err 246 } 247 if version.AtLeast(5, 6) { 248 dispatchMode = fdbased.PacketMMap 249 } else { 250 log.Infof("Host kernel version < 5.6, falling back to RecvMMsg dispatch") 251 } 252 253 fdOffset := 0 254 for _, link := range args.FDBasedLinks { 255 nicID++ 256 nicids[link.Name] = nicID 257 258 FDs := make([]int, 0, link.NumChannels) 259 for j := 0; j < link.NumChannels; j++ { 260 // Copy the underlying FD. 261 oldFD := args.FilePayload.Files[fdOffset].Fd() 262 newFD, err := unix.Dup(int(oldFD)) 263 if err != nil { 264 return fmt.Errorf("failed to dup FD %v: %v", oldFD, err) 265 } 266 FDs = append(FDs, newFD) 267 fdOffset++ 268 } 269 270 mac := tcpip.LinkAddress(link.LinkAddress) 271 log.Infof("gso max size is: %d", link.GSOMaxSize) 272 273 linkEP, err := fdbased.New(&fdbased.Options{ 274 FDs: FDs, 275 MTU: uint32(link.MTU), 276 EthernetHeader: mac != "", 277 Address: mac, 278 PacketDispatchMode: dispatchMode, 279 GSOMaxSize: link.GSOMaxSize, 280 GvisorGSOEnabled: link.GvisorGSOEnabled, 281 TXChecksumOffload: link.TXChecksumOffload, 282 RXChecksumOffload: link.RXChecksumOffload, 283 }) 284 if err != nil { 285 return err 286 } 287 288 // Wrap linkEP in a sniffer to enable packet logging. 289 sniffEP := sniffer.New(packetsocket.New(linkEP)) 290 291 var qDisc stack.QueueingDiscipline 292 switch link.QDisc { 293 case config.QDiscNone: 294 case config.QDiscFIFO: 295 log.Infof("Enabling FIFO QDisc on %q", link.Name) 296 qDisc = fifo.New(sniffEP, runtime.GOMAXPROCS(0), 1000) 297 } 298 299 log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) 300 opts := stack.NICOptions{ 301 Name: link.Name, 302 QDisc: qDisc, 303 GROTimeout: link.GvisorGROTimeout, 304 } 305 if err := n.createNICWithAddrs(nicID, sniffEP, opts, link.Addresses); err != nil { 306 return err 307 } 308 309 // Collect the routes from this link. 310 for _, r := range link.Routes { 311 route, err := r.toTcpipRoute(nicID) 312 if err != nil { 313 return err 314 } 315 routes = append(routes, route) 316 } 317 318 for _, neigh := range link.Neighbors { 319 proto, tcpipAddr := ipToAddressAndProto(neigh.IP) 320 n.Stack.AddStaticNeighbor(nicID, proto, tcpipAddr, tcpip.LinkAddress(neigh.HardwareAddr)) 321 } 322 } 323 } else if len(args.XDPLinks) > 0 { 324 if nlinks := len(args.XDPLinks); nlinks > 1 { 325 return fmt.Errorf("XDP only supports one link device, but got %d", nlinks) 326 } 327 link := args.XDPLinks[0] 328 nicID++ 329 nicids[link.Name] = nicID 330 331 // Get the AF_XDP socket. 332 fdOffset := 0 333 oldFD := args.FilePayload.Files[fdOffset].Fd() 334 fd, err := unix.Dup(int(oldFD)) 335 if err != nil { 336 return fmt.Errorf("failed to dup AF_XDP fd %v: %v", oldFD, err) 337 } 338 fdOffset++ 339 340 // The parent process sends several other FDs in order 341 // to keep them open and alive. These are for BPF 342 // programs and maps that, if closed, will break the 343 // dispatcher. 344 for _, fdName := range []string{"program-fd", "sockmap-fd", "link-fd"} { 345 oldFD := args.FilePayload.Files[fdOffset].Fd() 346 if _, err := unix.Dup(int(oldFD)); err != nil { 347 return fmt.Errorf("failed to dup %s with FD %d: %v", fdName, oldFD, err) 348 } 349 fdOffset++ 350 } 351 352 mac := tcpip.LinkAddress(link.LinkAddress) 353 linkEP, err := xdp.New(&xdp.Options{ 354 FD: fd, 355 Address: mac, 356 TXChecksumOffload: link.TXChecksumOffload, 357 RXChecksumOffload: link.RXChecksumOffload, 358 InterfaceIndex: link.InterfaceIndex, 359 }) 360 if err != nil { 361 return err 362 } 363 364 // Wrap linkEP in a sniffer to enable packet logging. 365 var sniffEP stack.LinkEndpoint 366 if args.PCAP { 367 newFD, err := unix.Dup(int(args.FilePayload.Files[fdOffset].Fd())) 368 if err != nil { 369 return fmt.Errorf("failed to dup pcap FD: %v", err) 370 } 371 const packetTruncateSize = 4096 372 sniffEP, err = sniffer.NewWithWriter(packetsocket.New(linkEP), os.NewFile(uintptr(newFD), "pcap-file"), packetTruncateSize) 373 if err != nil { 374 return fmt.Errorf("failed to create PCAP logger: %v", err) 375 } 376 fdOffset++ 377 } else { 378 sniffEP = sniffer.New(packetsocket.New(linkEP)) 379 } 380 381 var qDisc stack.QueueingDiscipline 382 switch link.QDisc { 383 case config.QDiscNone: 384 case config.QDiscFIFO: 385 log.Infof("Enabling FIFO QDisc on %q", link.Name) 386 qDisc = fifo.New(sniffEP, runtime.GOMAXPROCS(0), 1000) 387 } 388 389 log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) 390 opts := stack.NICOptions{ 391 Name: link.Name, 392 QDisc: qDisc, 393 GROTimeout: link.GvisorGROTimeout, 394 } 395 if err := n.createNICWithAddrs(nicID, sniffEP, opts, link.Addresses); err != nil { 396 return err 397 } 398 399 // Collect the routes from this link. 400 for _, r := range link.Routes { 401 route, err := r.toTcpipRoute(nicID) 402 if err != nil { 403 return err 404 } 405 routes = append(routes, route) 406 } 407 408 for _, neigh := range link.Neighbors { 409 proto, tcpipAddr := ipToAddressAndProto(neigh.IP) 410 n.Stack.AddStaticNeighbor(nicID, proto, tcpipAddr, tcpip.LinkAddress(neigh.HardwareAddr)) 411 } 412 } 413 414 if !args.Defaultv4Gateway.Route.Empty() { 415 nicID, ok := nicids[args.Defaultv4Gateway.Name] 416 if !ok { 417 return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name) 418 } 419 route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID) 420 if err != nil { 421 return err 422 } 423 routes = append(routes, route) 424 } 425 426 if !args.Defaultv6Gateway.Route.Empty() { 427 nicID, ok := nicids[args.Defaultv6Gateway.Name] 428 if !ok { 429 return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name) 430 } 431 route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID) 432 if err != nil { 433 return err 434 } 435 routes = append(routes, route) 436 } 437 438 log.Infof("Setting routes %+v", routes) 439 n.Stack.SetRouteTable(routes) 440 return nil 441 } 442 443 // createNICWithAddrs creates a NIC in the network stack and adds the given 444 // addresses. 445 func (n *Network) createNICWithAddrs(id tcpip.NICID, ep stack.LinkEndpoint, opts stack.NICOptions, addrs []IPWithPrefix) error { 446 if err := n.Stack.CreateNICWithOptions(id, ep, opts); err != nil { 447 return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err) 448 } 449 450 for _, addr := range addrs { 451 proto, tcpipAddr := ipToAddressAndProto(addr.Address) 452 protocolAddr := tcpip.ProtocolAddress{ 453 Protocol: proto, 454 AddressWithPrefix: tcpip.AddressWithPrefix{ 455 Address: tcpipAddr, 456 PrefixLen: addr.PrefixLen, 457 }, 458 } 459 if err := n.Stack.AddProtocolAddress(id, protocolAddr, stack.AddressProperties{}); err != nil { 460 return fmt.Errorf("AddProtocolAddress(%d, %+v, {}) failed: %s", id, protocolAddr, err) 461 } 462 } 463 return nil 464 } 465 466 // ipToAddressAndProto converts IP to tcpip.Address and a protocol number. 467 // 468 // Note: don't use 'len(ip)' to determine IP version because length is always 16. 469 func ipToAddressAndProto(ip net.IP) (tcpip.NetworkProtocolNumber, tcpip.Address) { 470 if i4 := ip.To4(); i4 != nil { 471 return ipv4.ProtocolNumber, tcpip.AddrFromSlice(i4) 472 } 473 return ipv6.ProtocolNumber, tcpip.AddrFromSlice(ip) 474 } 475 476 // ipToAddress converts IP to tcpip.Address, ignoring the protocol. 477 func ipToAddress(ip net.IP) tcpip.Address { 478 _, addr := ipToAddressAndProto(ip) 479 return addr 480 } 481 482 // ipMaskToAddressMask converts IPMask to tcpip.AddressMask, ignoring the 483 // protocol. 484 func ipMaskToAddressMask(ipMask net.IPMask) tcpip.AddressMask { 485 addr := ipToAddress(net.IP(ipMask)) 486 return tcpip.MaskFromBytes(addr.AsSlice()) 487 }