github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/nettools/nettools.go (about) 1 /* 2 Copyright 2016 Mirantis 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Some of the code is based on CNI's plugins/main/bridge/bridge.go, pkg/ip/link.go 18 // Original copyright notice: 19 // 20 // Copyright 2014 CNI authors 21 // 22 // Licensed under the Apache License, Version 2.0 (the "License"); 23 // you may not use this file except in compliance with the License. 24 // You may obtain a copy of the License at 25 // 26 // http://www.apache.org/licenses/LICENSE-2.0 27 // 28 // Unless required by applicable law or agreed to in writing, software 29 // distributed under the License is distributed on an "AS IS" BASIS, 30 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 31 // See the License for the specific language governing permissions and 32 // limitations under the License. 33 34 package nettools 35 36 import ( 37 "crypto/rand" 38 "errors" 39 "fmt" 40 "log" 41 "net" 42 "os" 43 "os/exec" 44 "sort" 45 46 "github.com/containernetworking/cni/pkg/ns" 47 cnitypes "github.com/containernetworking/cni/pkg/types" 48 cnicurrent "github.com/containernetworking/cni/pkg/types/current" 49 "github.com/davecgh/go-spew/spew" 50 "github.com/golang/glog" 51 "github.com/vishvananda/netlink" 52 53 "github.com/Mirantis/virtlet/pkg/cni" 54 "github.com/Mirantis/virtlet/pkg/network" 55 ) 56 57 const ( 58 defaultMTU = 1500 59 tapInterfaceNameTemplate = "tap%d" 60 containerBridgeNameTemplate = "br%d" 61 loopbackInterfaceName = "lo" 62 // Address for dhcp server internal interface 63 internalDhcpAddr = "169.254.254.2/24" 64 ) 65 66 func makeVethPair(name, peer string, mtu int) (netlink.Link, error) { 67 veth := &netlink.Veth{ 68 LinkAttrs: netlink.LinkAttrs{ 69 Name: name, 70 Flags: net.FlagUp, 71 MTU: mtu, 72 }, 73 PeerName: peer, 74 } 75 if err := netlink.LinkAdd(veth); err != nil { 76 return nil, err 77 } 78 79 return veth, nil 80 } 81 82 func peerExists(name string) bool { 83 if _, err := netlink.LinkByName(name); err != nil { 84 return false 85 } 86 return true 87 } 88 89 func makeVeth(name string, mtu int) (peerName string, veth netlink.Link, err error) { 90 for i := 0; i < 10; i++ { 91 peerName, err = RandomVethName() 92 if err != nil { 93 return 94 } 95 96 veth, err = makeVethPair(name, peerName, mtu) 97 switch { 98 case err == nil: 99 return 100 101 case os.IsExist(err): 102 if peerExists(peerName) { 103 continue 104 } 105 err = fmt.Errorf("container veth name provided (%v) already exists", name) 106 return 107 108 default: 109 err = fmt.Errorf("failed to make veth pair: %v", err) 110 return 111 } 112 } 113 114 // should really never be hit 115 err = fmt.Errorf("failed to find a unique veth name") 116 return 117 } 118 119 // RandomVethName returns string "veth" with random prefix (hashed from entropy) 120 func RandomVethName() (string, error) { 121 entropy := make([]byte, 4) 122 _, err := rand.Reader.Read(entropy) 123 if err != nil { 124 return "", fmt.Errorf("failed to generate random veth name: %v", err) 125 } 126 127 // NetworkManager (recent versions) will ignore veth devices that start with "veth" 128 return fmt.Sprintf("veth%x", entropy), nil 129 } 130 131 // SetupVeth sets up a pair of virtual ethernet devices. 132 // Call SetupVeth from inside the container netns. It will create both veth 133 // devices and move the host-side veth into the provided hostNS namespace. 134 // On success, SetupVeth returns (hostVeth, containerVeth, nil) 135 func SetupVeth(contVethName string, mtu int, hostNS ns.NetNS) (netlink.Link, netlink.Link, error) { 136 hostVethName, contVeth, err := makeVeth(contVethName, mtu) 137 if err != nil { 138 return nil, nil, err 139 } 140 141 if err = netlink.LinkSetUp(contVeth); err != nil { 142 return nil, nil, fmt.Errorf("failed to set %q up: %v", contVethName, err) 143 } 144 145 hostVeth, err := netlink.LinkByName(hostVethName) 146 if err != nil { 147 return nil, nil, fmt.Errorf("failed to lookup %q: %v", hostVethName, err) 148 } 149 150 if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil { 151 return nil, nil, fmt.Errorf("failed to move veth to host netns: %v", err) 152 } 153 154 err = hostNS.Do(func(_ ns.NetNS) error { 155 hostVeth, err = netlink.LinkByName(hostVethName) 156 if err != nil { 157 return fmt.Errorf("failed to lookup %q in %q: %v", hostVethName, hostNS.Path(), err) 158 } 159 160 if err = netlink.LinkSetUp(hostVeth); err != nil { 161 return fmt.Errorf("failed to set %q up: %v", hostVethName, err) 162 } 163 return nil 164 }) 165 if err != nil { 166 return nil, nil, err 167 } 168 return hostVeth, contVeth, nil 169 } 170 171 // CreateEscapeVethPair creates a veth pair with innerVeth residing in 172 // the specified network namespace innerNS and outerVeth residing in 173 // the 'outer' (current) namespace. 174 // TBD: move this to test tools 175 func CreateEscapeVethPair(innerNS ns.NetNS, ifName string, mtu int) (outerVeth, innerVeth netlink.Link, err error) { 176 var outerVethName string 177 178 err = innerNS.Do(func(outerNS ns.NetNS) error { 179 // create the veth pair in the inner ns and move outer end into the outer netns 180 outerVeth, innerVeth, err = SetupVeth(ifName, mtu, outerNS) 181 if err != nil { 182 return err 183 } 184 185 // need to lookup innerVeth again to get its attrs 186 innerVeth, err = netlink.LinkByName(innerVeth.Attrs().Name) 187 if err != nil { 188 return err 189 } 190 191 outerVethName = outerVeth.Attrs().Name 192 return nil 193 }) 194 if err != nil { 195 return 196 } 197 198 // need to lookup outerVeth again as its index has changed during ns move 199 outerVeth, err = netlink.LinkByName(outerVethName) 200 if err != nil { 201 return nil, nil, fmt.Errorf("failed to lookup %q: %v", outerVethName, err) 202 } 203 204 return 205 } 206 207 func createBridge(brName string, mtu int) (*netlink.Bridge, error) { 208 br := &netlink.Bridge{ 209 LinkAttrs: netlink.LinkAttrs{ 210 Name: brName, 211 MTU: mtu, 212 // Let kernel use default txqueuelen; leaving it unset 213 // means 0, and a zero-length TX queue messes up FIFO 214 // traffic shapers which use TX queue length as the 215 // default packet limit 216 TxQLen: -1, 217 }, 218 } 219 220 if err := netlink.LinkAdd(br); err != nil { 221 return nil, fmt.Errorf("could not add %q: %v", brName, err) 222 } 223 224 if err := netlink.LinkSetUp(br); err != nil { 225 return nil, err 226 } 227 228 return br, nil 229 } 230 231 // SetupBridge creates a bridge and adds specified links to it. 232 // It sets bridge's MTU to MTU value of the first link. 233 func SetupBridge(bridgeName string, links []netlink.Link) (*netlink.Bridge, error) { 234 if len(links) == 0 { 235 return nil, errors.New("no links provided") 236 } 237 238 br, err := createBridge(bridgeName, links[0].Attrs().MTU) 239 if err != nil { 240 return nil, fmt.Errorf("failed to create bridge %q: %v", bridgeName, err) 241 } 242 243 for _, link := range links { 244 if err = linkSetMaster(link, br); err != nil { 245 delMessage := "" 246 if delErr := netlink.LinkDel(br); delErr != nil { 247 delMessage = fmt.Sprintf(" (and failed to delete the bridge: %v", err) 248 } 249 return nil, fmt.Errorf("failed to connect %q to bridge %v: %v%s", link.Attrs().Name, br.Attrs().Name, err, delMessage) 250 } 251 } 252 253 return br, nil 254 } 255 256 // FindVeth locates single veth link in the list of provided links. 257 // There must be exactly one veth interface in the list. 258 func FindVeth(links []netlink.Link) (netlink.Link, error) { 259 var veth netlink.Link 260 for _, link := range links { 261 if link.Type() != "veth" { 262 continue 263 } 264 if veth != nil { 265 return nil, errors.New("multiple veth links detected in container namespace") 266 } 267 veth = link 268 } 269 if veth == nil { 270 return nil, errors.New("no veth interface found") 271 } 272 return veth, nil 273 } 274 275 func findLinkByAddress(links []netlink.Link, address net.IPNet) (netlink.Link, error) { 276 for _, link := range links { 277 addresses, err := netlink.AddrList(link, FAMILY_ALL) 278 if err != nil { 279 return nil, err 280 } 281 for _, addr := range addresses { 282 if addr.IPNet.String() == address.String() { 283 return link, nil 284 } 285 } 286 } 287 return nil, fmt.Errorf("interface with address %q not found in the container namespace", address.String()) 288 } 289 290 // ValidateAndFixCNIResult verifies that netConfig contains proper list of 291 // ips, routes, interfaces and if something is missing it tries to complement 292 // that using patch for Weave or for plugins which return their netConfig 293 // in v0.2.0 version of CNI SPEC 294 func ValidateAndFixCNIResult(netConfig *cnicurrent.Result, nsPath string, allLinks []netlink.Link) (*cnicurrent.Result, error) { 295 // If there are no routes provided, we consider it a broken 296 // config and extract interface config instead. That's the 297 // case with Weave CNI plugin. We don't do this for multiple CNI 298 // at this point. 299 if len(netConfig.IPs) == 1 && (cni.GetPodIP(netConfig) == "" || len(netConfig.Routes) == 0) { 300 dnsInfo := netConfig.DNS 301 302 veth, err := FindVeth(allLinks) 303 if err != nil { 304 return nil, err 305 } 306 if netConfig, err = ExtractLinkInfo(veth, nsPath); err != nil { 307 return nil, err 308 } 309 310 // extracted netConfig doesn't have DNS information, so 311 // still try to extract it from CNI-provided data 312 netConfig.DNS = dnsInfo 313 314 return netConfig, nil 315 } 316 317 if len(netConfig.IPs) == 0 { 318 return nil, fmt.Errorf("cni result does not have any IP addresses") 319 } 320 321 // Interfaces contain broken info more often than not, so we 322 // replace them here with what we can deduce from the network 323 // links in the container netns 324 for _, ipConfig := range netConfig.IPs { 325 link, err := findLinkByAddress(allLinks, ipConfig.Address) 326 if err != nil { 327 return nil, err 328 } 329 330 found := false 331 for i, iface := range netConfig.Interfaces { 332 if iface.Name == link.Attrs().Name { 333 ipConfig.Interface = i 334 found = true 335 break 336 } 337 } 338 if !found { 339 ipConfig.Interface = len(netConfig.Interfaces) 340 netConfig.Interfaces = append(netConfig.Interfaces, &cnicurrent.Interface{ 341 Name: link.Attrs().Name, 342 Mac: link.Attrs().HardwareAddr.String(), 343 Sandbox: nsPath, 344 }) 345 } 346 } 347 348 return netConfig, nil 349 } 350 351 // getContainerLinks finds links that correspond to interfaces in the current 352 // network namespace 353 func getContainerLinks(info *cnicurrent.Result) ([]netlink.Link, error) { 354 // info.Interfaces is omitted by some CNI implementations and 355 // the order may not be correct there after Virtlet adds the 356 // missing ones, so we use interface indexes from info.IPs for 357 // ordering. 358 var links []netlink.Link 359 order := make([]int, len(info.Interfaces)) 360 for n, ip := range info.IPs { 361 if ip.Interface >= 0 && ip.Interface < len(order) { 362 order[ip.Interface] = n + 1 363 } 364 } 365 ifaces := make([]*cnicurrent.Interface, len(info.Interfaces)) 366 copy(ifaces, info.Interfaces) 367 sort.SliceStable(ifaces, func(i, j int) bool { return order[i] < order[j] }) 368 for _, iface := range ifaces { 369 // empty Sandbox means this interface belongs to the host 370 // network namespace, so we skip it 371 if iface.Sandbox == "" { 372 continue 373 } 374 // If link is unavailable - simply add nil to slice 375 link, err := netlink.LinkByName(iface.Name) 376 if err != nil { 377 if _, ok := err.(netlink.LinkNotFoundError); !ok { 378 return nil, err 379 } 380 } 381 links = append(links, link) 382 } 383 return links, nil 384 } 385 386 // StripLink removes addresses from the link 387 // along with any routes related to the link, except 388 // those created by the kernel 389 func StripLink(link netlink.Link) error { 390 routes, err := netlink.RouteList(link, FAMILY_V4) 391 if err != nil { 392 return fmt.Errorf("failed to list routes: %v", err) 393 } 394 395 addrs, err := netlink.AddrList(link, FAMILY_V4) 396 if err != nil { 397 return fmt.Errorf("failed to get addresses for link: %v", err) 398 } 399 400 for _, route := range routes { 401 if route.Protocol == RTPROT_KERNEL { 402 // route created by the kernel 403 continue 404 } 405 if err = netlink.RouteDel(&route); err != nil { 406 return fmt.Errorf("error deleting route: %v", err) 407 } 408 } 409 410 for _, addr := range addrs { 411 if err = netlink.AddrDel(link, &addr); err != nil { 412 return fmt.Errorf("error deleting address from the route: %v", err) 413 } 414 } 415 416 return nil 417 } 418 419 // ExtractLinkInfo extracts ip address and netmask from veth 420 // interface in the current namespace, together with routes for this 421 // interface. 422 // There must be exactly one veth interface in the namespace 423 // and exactly one address associated with veth. 424 // Returns interface info struct and error, if any. 425 func ExtractLinkInfo(link netlink.Link, nsPath string) (*cnicurrent.Result, error) { 426 addrs, err := netlink.AddrList(link, FAMILY_V4) 427 if err != nil { 428 return nil, fmt.Errorf("failed to get addresses for link: %v", err) 429 } 430 if len(addrs) != 1 { 431 return nil, fmt.Errorf("expected exactly one address for link, but got %v", addrs) 432 } 433 434 result := &cnicurrent.Result{ 435 Interfaces: []*cnicurrent.Interface{ 436 { 437 Name: link.Attrs().Name, 438 Mac: link.Attrs().HardwareAddr.String(), 439 Sandbox: nsPath, 440 }, 441 }, 442 IPs: []*cnicurrent.IPConfig{ 443 { 444 Version: "4", 445 Interface: 0, 446 Address: *addrs[0].IPNet, 447 }, 448 }, 449 } 450 451 routes, err := netlink.RouteList(link, FAMILY_V4) 452 if err != nil { 453 return nil, fmt.Errorf("failed to list routes: %v", err) 454 } 455 for _, route := range routes { 456 switch { 457 case route.Protocol == RTPROT_KERNEL: 458 // route created by kernel 459 case route.Gw == nil: 460 // these routes can't be represented properly 461 // by CNI result because CNI will consider 462 // them having IP's default Gateway value as 463 // Gw 464 case (route.Dst == nil || route.Dst.IP == nil) && route.Gw == nil: 465 // route has only Src 466 case (route.Dst == nil || route.Dst.IP == nil): 467 result.IPs[0].Gateway = route.Gw 468 result.Routes = append(result.Routes, &cnitypes.Route{ 469 Dst: net.IPNet{ 470 IP: net.IP{0, 0, 0, 0}, 471 Mask: net.IPMask{0, 0, 0, 0}, 472 }, 473 GW: route.Gw, 474 }) 475 default: 476 result.Routes = append(result.Routes, &cnitypes.Route{ 477 Dst: *route.Dst, 478 GW: route.Gw, 479 }) 480 } 481 } 482 483 return result, nil 484 } 485 486 func mustParseAddr(addr string) *netlink.Addr { 487 r, err := netlink.ParseAddr(addr) 488 if err != nil { 489 log.Panicf("Failed to parse address %q: %v", addr, err) 490 } 491 return r 492 } 493 494 func bringUpLoopback() error { 495 // lo interface is already there in the new ns but it's down 496 lo, err := netlink.LinkByName(loopbackInterfaceName) 497 if err != nil { 498 return fmt.Errorf("failed to find link %q: %v", loopbackInterfaceName, err) 499 } 500 if err := netlink.LinkSetUp(lo); err != nil { 501 return fmt.Errorf("failed to bring up link %q: %v", loopbackInterfaceName, err) 502 } 503 return nil 504 } 505 506 func updateEbTables(nsPath, interfaceName, command string) error { 507 // block/unblock DHCP traffic from/to CNI-provided link 508 for _, item := range []struct{ chain, opt string }{ 509 // dhcp responses originate from bridge itself 510 {"OUTPUT", "--ip-source-port"}, 511 // dhcp requests originate from the VM 512 {"FORWARD", "--ip-destination-port"}, 513 } { 514 if out, err := exec.Command( 515 "nsenter", "--net="+nsPath, 516 "ebtables", command, item.chain, "-p", "IPV4", "--ip-protocol", "UDP", 517 item.opt, "67", "--out-if", interfaceName, "-j", "DROP").CombinedOutput(); err != nil { 518 return fmt.Errorf("[netns %q] ebtables failed: %v\nOut:\n%s", nsPath, err, out) 519 } 520 } 521 522 return nil 523 } 524 525 func disableMacLearning(nsPath string, bridgeName string) error { 526 if out, err := exec.Command("nsenter", "--net="+nsPath, "brctl", "setageing", bridgeName, "0").CombinedOutput(); err != nil { 527 return fmt.Errorf("[netns %q] brctl failed: %v\nOut:\n%s", nsPath, err, out) 528 } 529 530 return nil 531 } 532 533 // SetHardwareAddr sets hardware address on provided link. 534 func SetHardwareAddr(link netlink.Link, hwAddr net.HardwareAddr) error { 535 if err := netlink.LinkSetDown(link); err != nil { 536 return fmt.Errorf("can't bring down the link: %v", err) 537 } 538 if err := netlink.LinkSetHardwareAddr(link, hwAddr); err != nil { 539 return fmt.Errorf("can't set hardware address for the link: %v", err) 540 } 541 if err := netlink.LinkSetUp(link); err != nil { 542 return fmt.Errorf("can't bring up the link: %v", err) 543 } 544 545 return nil 546 } 547 548 func setupTapAndGetInterfaceDescription(link netlink.Link, nsPath string, ifaceNo int) (*network.InterfaceDescription, error) { 549 hwAddr := link.Attrs().HardwareAddr 550 ifaceName := link.Attrs().Name 551 552 mtu := link.Attrs().MTU 553 554 newHwAddr, err := GenerateMacAddress() 555 if err == nil { 556 err = SetHardwareAddr(link, newHwAddr) 557 } 558 if err != nil { 559 return nil, err 560 } 561 562 tapInterfaceName := fmt.Sprintf(tapInterfaceNameTemplate, ifaceNo) 563 tap, err := CreateTAP(tapInterfaceName, mtu) 564 if err != nil { 565 return nil, err 566 } 567 568 containerBridgeName := fmt.Sprintf(containerBridgeNameTemplate, ifaceNo) 569 br, err := SetupBridge(containerBridgeName, []netlink.Link{link, tap}) 570 if err != nil { 571 return nil, fmt.Errorf("failed to create bridge: %v", err) 572 } 573 574 if err := netlink.AddrAdd(br, mustParseAddr(internalDhcpAddr)); err != nil { 575 return nil, fmt.Errorf("failed to set address for the bridge: %v", err) 576 } 577 578 // Add ebtables DHCP blocking rules 579 if err := updateEbTables(nsPath, ifaceName, "-A"); err != nil { 580 return nil, err 581 } 582 583 // Work around bridge MAC learning problem 584 // https://ubuntuforums.org/showthread.php?t=2329373&s=cf580a41179e0f186ad4e625834a1d61&p=13511965#post13511965 585 // (affects Flannel) 586 if err := disableMacLearning(nsPath, containerBridgeName); err != nil { 587 return nil, err 588 } 589 590 if err := bringUpLoopback(); err != nil { 591 return nil, err 592 } 593 594 glog.V(3).Infof("Opening tap interface %q for link %q", tapInterfaceName, ifaceName) 595 fo, err := OpenTAP(tapInterfaceName) 596 if err != nil { 597 return nil, fmt.Errorf("failed to open tap: %v", err) 598 } 599 glog.V(3).Infof("Adding interface %q as %q", ifaceName, tapInterfaceName) 600 601 return &network.InterfaceDescription{ 602 Type: network.InterfaceTypeTap, 603 Name: ifaceName, 604 Fo: fo, 605 HardwareAddr: hwAddr, 606 MTU: uint16(mtu), 607 }, nil 608 } 609 610 // SetupContainerSideNetwork sets up networking in container 611 // namespace. It does so by preparing the following 612 // network interfaces in container ns: 613 // tapX - tap interface for the each interface to pass to VM 614 // brX - a bridge that joins above tapX and original CNI interface 615 // with X denoting an link index in info.Interfaces list. 616 // Each bridge gets assigned a link-local address to be used 617 // for dhcp server. 618 // In case of SR-IOV VFs this function only sets up a device to be passed to VM. 619 // The function should be called from within container namespace. 620 // Returns container network struct and an error, if any. 621 func SetupContainerSideNetwork(info *cnicurrent.Result, nsPath string, allLinks []netlink.Link, enableSriov bool, hostNS ns.NetNS) (*network.ContainerSideNetwork, error) { 622 contLinks, err := getContainerLinks(info) 623 if err != nil { 624 return nil, err 625 } 626 627 var interfaces []*network.InterfaceDescription 628 for i, link := range contLinks { 629 if link == nil { 630 return nil, fmt.Errorf("missing link #%d in the container network namespace (Virtlet pod restarted?)", i) 631 } 632 633 if err := StripLink(link); err != nil { 634 return nil, err 635 } 636 637 var ifDesc *network.InterfaceDescription 638 639 if isSriovVf(link) { 640 if !enableSriov { 641 return nil, fmt.Errorf("SR-IOV device configured in container network namespace while Virtlet is configured with disabled SR-IOV support") 642 } 643 if ifDesc, err = setupSriovAndGetInterfaceDescription(link, hostNS); err != nil { 644 return nil, err 645 } 646 } else { 647 if ifDesc, err = setupTapAndGetInterfaceDescription(link, nsPath, i); err != nil { 648 return nil, err 649 } 650 } 651 652 interfaces = append(interfaces, ifDesc) 653 } 654 655 return &network.ContainerSideNetwork{info, nsPath, interfaces}, nil 656 } 657 658 // RecoverContainerSideNetwork tries to populate ContainerSideNetwork 659 // structure based on a network namespace that was already adjusted for Virtlet 660 func RecoverContainerSideNetwork(csn *network.ContainerSideNetwork, nsPath string, allLinks []netlink.Link, hostNS ns.NetNS) error { 661 if len(csn.Result.Interfaces) == 0 { 662 return fmt.Errorf("wrong cni configuration: no interfaces defined: %s", spew.Sdump(csn.Result)) 663 } 664 665 contLinks, err := getContainerLinks(csn.Result) 666 if err != nil { 667 return err 668 } 669 670 oldDescs := map[string]*network.InterfaceDescription{} 671 for _, desc := range csn.Interfaces { 672 oldDescs[desc.Name] = desc 673 } 674 675 for _, link := range contLinks { 676 // Skip missing link which is already used by running VM 677 if link == nil { 678 continue 679 } 680 ifaceName := link.Attrs().Name 681 desc, found := oldDescs[ifaceName] 682 if !found { 683 glog.Warningf("Recovering container side network: missing description for interface %q", ifaceName) 684 } 685 delete(oldDescs, ifaceName) 686 var ifaceType network.InterfaceType 687 688 if isSriovVf(link) { 689 ifaceType = network.InterfaceTypeVF 690 691 // device should be already unbound, but after machine reboot that can be necessary 692 unbindDriverFromDevice(desc.PCIAddress) 693 694 devIdentifier, err := getDeviceIdentifier(desc.PCIAddress) 695 if err != nil { 696 return err 697 } 698 699 // this can be problematic in case of machine reboot - we are trying to use the same 700 // devices as was used before reboot, but in meantime there is small chance that they 701 // were used already by sriov cni plugin (for which reboot means it's starting everything 702 // from clean situation) for some other pods, before even virtlet was started 703 // also in case of virtlet pod restart - device can be already bound to vfio-pci, so we 704 // are ignoring any error there) 705 bindDeviceToVFIO(devIdentifier) 706 } else { 707 ifaceType = network.InterfaceTypeTap 708 // It's OK if OpenTAP failed as the device is busy and used by running VM 709 if fo, err := OpenTAP(link.Attrs().Name); err == nil { 710 desc.Fo = fo 711 } 712 } 713 if desc.Type != ifaceType { 714 return fmt.Errorf("bad interface type for %q", desc.Name) 715 } 716 } 717 718 return nil 719 } 720 721 // TeardownBridge removes links from bridge and sets it down 722 func TeardownBridge(bridge netlink.Link, links []netlink.Link) error { 723 for _, link := range links { 724 if err := netlink.LinkSetNoMaster(link); err != nil { 725 return err 726 } 727 } 728 729 return netlink.LinkSetDown(bridge) 730 } 731 732 // ConfigureLink configures a link according to the CNI result 733 func ConfigureLink(link netlink.Link, info *cnicurrent.Result) error { 734 ifaceNo := -1 735 linkMAC := link.Attrs().HardwareAddr.String() 736 for i, iface := range info.Interfaces { 737 if iface.Mac == linkMAC { 738 ifaceNo = i 739 break 740 } 741 } 742 if ifaceNo == -1 { 743 return fmt.Errorf("can't find link with MAC %q in saved cni result: %s", linkMAC, spew.Sdump(info)) 744 } 745 746 for _, addr := range info.IPs { 747 if addr.Interface == ifaceNo { 748 linkAddr := &netlink.Addr{IPNet: &addr.Address} 749 if err := netlink.AddrAdd(link, linkAddr); err != nil { 750 return fmt.Errorf("error adding address %v to link %q: %v", addr.Address, link.Attrs().Name, err) 751 } 752 753 for _, route := range info.Routes { 754 // TODO: that's too naive - if there are more than one interfaces which have this gw address 755 // in their subnet - same gw will be added on both of them 756 // in theory this should be ok, but there is can lead to configuration other than prepared 757 // by cni plugins 758 if linkAddr.Contains(route.GW) { 759 err := netlink.RouteAdd(&netlink.Route{ 760 LinkIndex: link.Attrs().Index, 761 Scope: SCOPE_UNIVERSE, 762 Dst: &route.Dst, 763 Gw: route.GW, 764 }) 765 if err != nil { 766 return fmt.Errorf("error adding route (dst %v gw %v): %v", route.Dst, route.GW, err) 767 } 768 } 769 } 770 } 771 } 772 773 return nil 774 } 775 776 // Teardown cleans up container network configuration. 777 // It does so by invoking teardown sequence which removes ebtables rules, links 778 // and addresses in an order opposite to that of their creation in SetupContainerSideNetwork. 779 // The end result is the same network configuration in the container network namespace 780 // as it was before SetupContainerSideNetwork() call. 781 func Teardown(csn *network.ContainerSideNetwork) error { 782 for _, i := range csn.Interfaces { 783 i.Fo.Close() 784 } 785 786 contLinks, err := getContainerLinks(csn.Result) 787 if err != nil { 788 return err 789 } 790 791 for i, contLink := range contLinks { 792 if contLink == nil { 793 return fmt.Errorf("missing %d link during teardown", i) 794 } 795 796 // Remove ebtables DHCP rules 797 if err := updateEbTables(csn.NsPath, contLink.Attrs().Name, "-D"); err != nil { 798 return nil 799 } 800 801 if !isSriovVf(contLink) { 802 tapInterfaceName := fmt.Sprintf(tapInterfaceNameTemplate, i) 803 tap, err := netlink.LinkByName(tapInterfaceName) 804 if err != nil { 805 return err 806 } 807 808 containerBridgeName := fmt.Sprintf(containerBridgeNameTemplate, i) 809 br, err := netlink.LinkByName(containerBridgeName) 810 if err != nil { 811 return err 812 } 813 814 if err := netlink.AddrDel(br, mustParseAddr(internalDhcpAddr)); err != nil { 815 return err 816 } 817 818 if err := TeardownBridge(br, []netlink.Link{contLink, tap}); err != nil { 819 return err 820 } 821 822 if err := netlink.LinkDel(br); err != nil { 823 return err 824 } 825 826 if err := netlink.LinkSetDown(tap); err != nil { 827 return err 828 } 829 830 if err := netlink.LinkDel(tap); err != nil { 831 return err 832 } 833 834 if err := SetHardwareAddr(contLink, csn.Interfaces[i].HardwareAddr); err != nil { 835 return err 836 } 837 } 838 839 rereadLink, err := netlink.LinkByName(contLink.Attrs().Name) 840 if err != nil { 841 return err 842 } 843 if err := ConfigureLink(rereadLink, csn.Result); err != nil { 844 return err 845 } 846 } 847 848 return nil 849 } 850 851 // GenerateMacAddress returns a random locally administrated unicast 852 // hardware address. 853 // Copied from: 854 // https://github.com/coreos/rkt/blob/56564bac090b44788684040f2ffd66463f29d5d0/stage1/init/kvm/network.go#L71 855 func GenerateMacAddress() (net.HardwareAddr, error) { 856 mac := net.HardwareAddr{ 857 2, // locally administred unicast 858 0x65, 0x02, // OUI (randomly chosen by jell) 859 0, 0, 0, // bytes to randomly overwrite 860 } 861 862 _, err := rand.Reader.Read(mac[3:6]) 863 if err != nil { 864 return nil, fmt.Errorf("cannot generate random mac address: %v", err) 865 } 866 867 return mac, nil 868 }