github.com/mirantis/virtlet@v1.5.2-0.20191204181327-1659b8a48e9b/pkg/nettools/nettools.go (about)

     1  /*
     2  Copyright 2016 Mirantis
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Some of the code is based on CNI's plugins/main/bridge/bridge.go, pkg/ip/link.go
    18  // Original copyright notice:
    19  //
    20  // Copyright 2014 CNI authors
    21  //
    22  // Licensed under the Apache License, Version 2.0 (the "License");
    23  // you may not use this file except in compliance with the License.
    24  // You may obtain a copy of the License at
    25  //
    26  //     http://www.apache.org/licenses/LICENSE-2.0
    27  //
    28  // Unless required by applicable law or agreed to in writing, software
    29  // distributed under the License is distributed on an "AS IS" BASIS,
    30  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    31  // See the License for the specific language governing permissions and
    32  // limitations under the License.
    33  
    34  package nettools
    35  
    36  import (
    37  	"crypto/rand"
    38  	"errors"
    39  	"fmt"
    40  	"log"
    41  	"net"
    42  	"os"
    43  	"os/exec"
    44  	"sort"
    45  
    46  	"github.com/containernetworking/cni/pkg/ns"
    47  	cnitypes "github.com/containernetworking/cni/pkg/types"
    48  	cnicurrent "github.com/containernetworking/cni/pkg/types/current"
    49  	"github.com/davecgh/go-spew/spew"
    50  	"github.com/golang/glog"
    51  	"github.com/vishvananda/netlink"
    52  
    53  	"github.com/Mirantis/virtlet/pkg/cni"
    54  	"github.com/Mirantis/virtlet/pkg/network"
    55  )
    56  
    57  const (
    58  	defaultMTU                  = 1500
    59  	tapInterfaceNameTemplate    = "tap%d"
    60  	containerBridgeNameTemplate = "br%d"
    61  	loopbackInterfaceName       = "lo"
    62  	// Address for dhcp server internal interface
    63  	internalDhcpAddr = "169.254.254.2/24"
    64  )
    65  
    66  func makeVethPair(name, peer string, mtu int) (netlink.Link, error) {
    67  	veth := &netlink.Veth{
    68  		LinkAttrs: netlink.LinkAttrs{
    69  			Name:  name,
    70  			Flags: net.FlagUp,
    71  			MTU:   mtu,
    72  		},
    73  		PeerName: peer,
    74  	}
    75  	if err := netlink.LinkAdd(veth); err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	return veth, nil
    80  }
    81  
    82  func peerExists(name string) bool {
    83  	if _, err := netlink.LinkByName(name); err != nil {
    84  		return false
    85  	}
    86  	return true
    87  }
    88  
    89  func makeVeth(name string, mtu int) (peerName string, veth netlink.Link, err error) {
    90  	for i := 0; i < 10; i++ {
    91  		peerName, err = RandomVethName()
    92  		if err != nil {
    93  			return
    94  		}
    95  
    96  		veth, err = makeVethPair(name, peerName, mtu)
    97  		switch {
    98  		case err == nil:
    99  			return
   100  
   101  		case os.IsExist(err):
   102  			if peerExists(peerName) {
   103  				continue
   104  			}
   105  			err = fmt.Errorf("container veth name provided (%v) already exists", name)
   106  			return
   107  
   108  		default:
   109  			err = fmt.Errorf("failed to make veth pair: %v", err)
   110  			return
   111  		}
   112  	}
   113  
   114  	// should really never be hit
   115  	err = fmt.Errorf("failed to find a unique veth name")
   116  	return
   117  }
   118  
   119  // RandomVethName returns string "veth" with random prefix (hashed from entropy)
   120  func RandomVethName() (string, error) {
   121  	entropy := make([]byte, 4)
   122  	_, err := rand.Reader.Read(entropy)
   123  	if err != nil {
   124  		return "", fmt.Errorf("failed to generate random veth name: %v", err)
   125  	}
   126  
   127  	// NetworkManager (recent versions) will ignore veth devices that start with "veth"
   128  	return fmt.Sprintf("veth%x", entropy), nil
   129  }
   130  
   131  // SetupVeth sets up a pair of virtual ethernet devices.
   132  // Call SetupVeth from inside the container netns.  It will create both veth
   133  // devices and move the host-side veth into the provided hostNS namespace.
   134  // On success, SetupVeth returns (hostVeth, containerVeth, nil)
   135  func SetupVeth(contVethName string, mtu int, hostNS ns.NetNS) (netlink.Link, netlink.Link, error) {
   136  	hostVethName, contVeth, err := makeVeth(contVethName, mtu)
   137  	if err != nil {
   138  		return nil, nil, err
   139  	}
   140  
   141  	if err = netlink.LinkSetUp(contVeth); err != nil {
   142  		return nil, nil, fmt.Errorf("failed to set %q up: %v", contVethName, err)
   143  	}
   144  
   145  	hostVeth, err := netlink.LinkByName(hostVethName)
   146  	if err != nil {
   147  		return nil, nil, fmt.Errorf("failed to lookup %q: %v", hostVethName, err)
   148  	}
   149  
   150  	if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil {
   151  		return nil, nil, fmt.Errorf("failed to move veth to host netns: %v", err)
   152  	}
   153  
   154  	err = hostNS.Do(func(_ ns.NetNS) error {
   155  		hostVeth, err = netlink.LinkByName(hostVethName)
   156  		if err != nil {
   157  			return fmt.Errorf("failed to lookup %q in %q: %v", hostVethName, hostNS.Path(), err)
   158  		}
   159  
   160  		if err = netlink.LinkSetUp(hostVeth); err != nil {
   161  			return fmt.Errorf("failed to set %q up: %v", hostVethName, err)
   162  		}
   163  		return nil
   164  	})
   165  	if err != nil {
   166  		return nil, nil, err
   167  	}
   168  	return hostVeth, contVeth, nil
   169  }
   170  
   171  // CreateEscapeVethPair creates a veth pair with innerVeth residing in
   172  // the specified network namespace innerNS and outerVeth residing in
   173  // the 'outer' (current) namespace.
   174  // TBD: move this to test tools
   175  func CreateEscapeVethPair(innerNS ns.NetNS, ifName string, mtu int) (outerVeth, innerVeth netlink.Link, err error) {
   176  	var outerVethName string
   177  
   178  	err = innerNS.Do(func(outerNS ns.NetNS) error {
   179  		// create the veth pair in the inner ns and move outer end into the outer netns
   180  		outerVeth, innerVeth, err = SetupVeth(ifName, mtu, outerNS)
   181  		if err != nil {
   182  			return err
   183  		}
   184  
   185  		// need to lookup innerVeth again to get its attrs
   186  		innerVeth, err = netlink.LinkByName(innerVeth.Attrs().Name)
   187  		if err != nil {
   188  			return err
   189  		}
   190  
   191  		outerVethName = outerVeth.Attrs().Name
   192  		return nil
   193  	})
   194  	if err != nil {
   195  		return
   196  	}
   197  
   198  	// need to lookup outerVeth again as its index has changed during ns move
   199  	outerVeth, err = netlink.LinkByName(outerVethName)
   200  	if err != nil {
   201  		return nil, nil, fmt.Errorf("failed to lookup %q: %v", outerVethName, err)
   202  	}
   203  
   204  	return
   205  }
   206  
   207  func createBridge(brName string, mtu int) (*netlink.Bridge, error) {
   208  	br := &netlink.Bridge{
   209  		LinkAttrs: netlink.LinkAttrs{
   210  			Name: brName,
   211  			MTU:  mtu,
   212  			// Let kernel use default txqueuelen; leaving it unset
   213  			// means 0, and a zero-length TX queue messes up FIFO
   214  			// traffic shapers which use TX queue length as the
   215  			// default packet limit
   216  			TxQLen: -1,
   217  		},
   218  	}
   219  
   220  	if err := netlink.LinkAdd(br); err != nil {
   221  		return nil, fmt.Errorf("could not add %q: %v", brName, err)
   222  	}
   223  
   224  	if err := netlink.LinkSetUp(br); err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	return br, nil
   229  }
   230  
   231  // SetupBridge creates a bridge and adds specified links to it.
   232  // It sets bridge's MTU to MTU value of the first link.
   233  func SetupBridge(bridgeName string, links []netlink.Link) (*netlink.Bridge, error) {
   234  	if len(links) == 0 {
   235  		return nil, errors.New("no links provided")
   236  	}
   237  
   238  	br, err := createBridge(bridgeName, links[0].Attrs().MTU)
   239  	if err != nil {
   240  		return nil, fmt.Errorf("failed to create bridge %q: %v", bridgeName, err)
   241  	}
   242  
   243  	for _, link := range links {
   244  		if err = linkSetMaster(link, br); err != nil {
   245  			delMessage := ""
   246  			if delErr := netlink.LinkDel(br); delErr != nil {
   247  				delMessage = fmt.Sprintf(" (and failed to delete the bridge: %v", err)
   248  			}
   249  			return nil, fmt.Errorf("failed to connect %q to bridge %v: %v%s", link.Attrs().Name, br.Attrs().Name, err, delMessage)
   250  		}
   251  	}
   252  
   253  	return br, nil
   254  }
   255  
   256  // FindVeth locates single veth link in the list of provided links.
   257  // There must be exactly one veth interface in the list.
   258  func FindVeth(links []netlink.Link) (netlink.Link, error) {
   259  	var veth netlink.Link
   260  	for _, link := range links {
   261  		if link.Type() != "veth" {
   262  			continue
   263  		}
   264  		if veth != nil {
   265  			return nil, errors.New("multiple veth links detected in container namespace")
   266  		}
   267  		veth = link
   268  	}
   269  	if veth == nil {
   270  		return nil, errors.New("no veth interface found")
   271  	}
   272  	return veth, nil
   273  }
   274  
   275  func findLinkByAddress(links []netlink.Link, address net.IPNet) (netlink.Link, error) {
   276  	for _, link := range links {
   277  		addresses, err := netlink.AddrList(link, FAMILY_ALL)
   278  		if err != nil {
   279  			return nil, err
   280  		}
   281  		for _, addr := range addresses {
   282  			if addr.IPNet.String() == address.String() {
   283  				return link, nil
   284  			}
   285  		}
   286  	}
   287  	return nil, fmt.Errorf("interface with address %q not found in the container namespace", address.String())
   288  }
   289  
   290  // ValidateAndFixCNIResult verifies that netConfig contains proper list of
   291  // ips, routes, interfaces and if something is missing it tries to complement
   292  // that using patch for Weave or for plugins which return their netConfig
   293  // in v0.2.0 version of CNI SPEC
   294  func ValidateAndFixCNIResult(netConfig *cnicurrent.Result, nsPath string, allLinks []netlink.Link) (*cnicurrent.Result, error) {
   295  	// If there are no routes provided, we consider it a broken
   296  	// config and extract interface config instead. That's the
   297  	// case with Weave CNI plugin. We don't do this for multiple CNI
   298  	// at this point.
   299  	if len(netConfig.IPs) == 1 && (cni.GetPodIP(netConfig) == "" || len(netConfig.Routes) == 0) {
   300  		dnsInfo := netConfig.DNS
   301  
   302  		veth, err := FindVeth(allLinks)
   303  		if err != nil {
   304  			return nil, err
   305  		}
   306  		if netConfig, err = ExtractLinkInfo(veth, nsPath); err != nil {
   307  			return nil, err
   308  		}
   309  
   310  		// extracted netConfig doesn't have DNS information, so
   311  		// still try to extract it from CNI-provided data
   312  		netConfig.DNS = dnsInfo
   313  
   314  		return netConfig, nil
   315  	}
   316  
   317  	if len(netConfig.IPs) == 0 {
   318  		return nil, fmt.Errorf("cni result does not have any IP addresses")
   319  	}
   320  
   321  	// Interfaces contain broken info more often than not, so we
   322  	// replace them here with what we can deduce from the network
   323  	// links in the container netns
   324  	for _, ipConfig := range netConfig.IPs {
   325  		link, err := findLinkByAddress(allLinks, ipConfig.Address)
   326  		if err != nil {
   327  			return nil, err
   328  		}
   329  
   330  		found := false
   331  		for i, iface := range netConfig.Interfaces {
   332  			if iface.Name == link.Attrs().Name {
   333  				ipConfig.Interface = i
   334  				found = true
   335  				break
   336  			}
   337  		}
   338  		if !found {
   339  			ipConfig.Interface = len(netConfig.Interfaces)
   340  			netConfig.Interfaces = append(netConfig.Interfaces, &cnicurrent.Interface{
   341  				Name:    link.Attrs().Name,
   342  				Mac:     link.Attrs().HardwareAddr.String(),
   343  				Sandbox: nsPath,
   344  			})
   345  		}
   346  	}
   347  
   348  	return netConfig, nil
   349  }
   350  
   351  // getContainerLinks finds links that correspond to interfaces in the current
   352  // network namespace
   353  func getContainerLinks(info *cnicurrent.Result) ([]netlink.Link, error) {
   354  	// info.Interfaces is omitted by some CNI implementations and
   355  	// the order may not be correct there after Virtlet adds the
   356  	// missing ones, so we use interface indexes from info.IPs for
   357  	// ordering.
   358  	var links []netlink.Link
   359  	order := make([]int, len(info.Interfaces))
   360  	for n, ip := range info.IPs {
   361  		if ip.Interface >= 0 && ip.Interface < len(order) {
   362  			order[ip.Interface] = n + 1
   363  		}
   364  	}
   365  	ifaces := make([]*cnicurrent.Interface, len(info.Interfaces))
   366  	copy(ifaces, info.Interfaces)
   367  	sort.SliceStable(ifaces, func(i, j int) bool { return order[i] < order[j] })
   368  	for _, iface := range ifaces {
   369  		// empty Sandbox means this interface belongs to the host
   370  		// network namespace, so we skip it
   371  		if iface.Sandbox == "" {
   372  			continue
   373  		}
   374  		// If link is unavailable - simply add nil to slice
   375  		link, err := netlink.LinkByName(iface.Name)
   376  		if err != nil {
   377  			if _, ok := err.(netlink.LinkNotFoundError); !ok {
   378  				return nil, err
   379  			}
   380  		}
   381  		links = append(links, link)
   382  	}
   383  	return links, nil
   384  }
   385  
   386  // StripLink removes addresses from the link
   387  // along with any routes related to the link, except
   388  // those created by the kernel
   389  func StripLink(link netlink.Link) error {
   390  	routes, err := netlink.RouteList(link, FAMILY_V4)
   391  	if err != nil {
   392  		return fmt.Errorf("failed to list routes: %v", err)
   393  	}
   394  
   395  	addrs, err := netlink.AddrList(link, FAMILY_V4)
   396  	if err != nil {
   397  		return fmt.Errorf("failed to get addresses for link: %v", err)
   398  	}
   399  
   400  	for _, route := range routes {
   401  		if route.Protocol == RTPROT_KERNEL {
   402  			// route created by the kernel
   403  			continue
   404  		}
   405  		if err = netlink.RouteDel(&route); err != nil {
   406  			return fmt.Errorf("error deleting route: %v", err)
   407  		}
   408  	}
   409  
   410  	for _, addr := range addrs {
   411  		if err = netlink.AddrDel(link, &addr); err != nil {
   412  			return fmt.Errorf("error deleting address from the route: %v", err)
   413  		}
   414  	}
   415  
   416  	return nil
   417  }
   418  
   419  // ExtractLinkInfo extracts ip address and netmask from veth
   420  // interface in the current namespace, together with routes for this
   421  // interface.
   422  // There must be exactly one veth interface in the namespace
   423  // and exactly one address associated with veth.
   424  // Returns interface info struct and error, if any.
   425  func ExtractLinkInfo(link netlink.Link, nsPath string) (*cnicurrent.Result, error) {
   426  	addrs, err := netlink.AddrList(link, FAMILY_V4)
   427  	if err != nil {
   428  		return nil, fmt.Errorf("failed to get addresses for link: %v", err)
   429  	}
   430  	if len(addrs) != 1 {
   431  		return nil, fmt.Errorf("expected exactly one address for link, but got %v", addrs)
   432  	}
   433  
   434  	result := &cnicurrent.Result{
   435  		Interfaces: []*cnicurrent.Interface{
   436  			{
   437  				Name:    link.Attrs().Name,
   438  				Mac:     link.Attrs().HardwareAddr.String(),
   439  				Sandbox: nsPath,
   440  			},
   441  		},
   442  		IPs: []*cnicurrent.IPConfig{
   443  			{
   444  				Version:   "4",
   445  				Interface: 0,
   446  				Address:   *addrs[0].IPNet,
   447  			},
   448  		},
   449  	}
   450  
   451  	routes, err := netlink.RouteList(link, FAMILY_V4)
   452  	if err != nil {
   453  		return nil, fmt.Errorf("failed to list routes: %v", err)
   454  	}
   455  	for _, route := range routes {
   456  		switch {
   457  		case route.Protocol == RTPROT_KERNEL:
   458  			// route created by kernel
   459  		case route.Gw == nil:
   460  			// these routes can't be represented properly
   461  			// by CNI result because CNI will consider
   462  			// them having IP's default Gateway value as
   463  			// Gw
   464  		case (route.Dst == nil || route.Dst.IP == nil) && route.Gw == nil:
   465  			// route has only Src
   466  		case (route.Dst == nil || route.Dst.IP == nil):
   467  			result.IPs[0].Gateway = route.Gw
   468  			result.Routes = append(result.Routes, &cnitypes.Route{
   469  				Dst: net.IPNet{
   470  					IP:   net.IP{0, 0, 0, 0},
   471  					Mask: net.IPMask{0, 0, 0, 0},
   472  				},
   473  				GW: route.Gw,
   474  			})
   475  		default:
   476  			result.Routes = append(result.Routes, &cnitypes.Route{
   477  				Dst: *route.Dst,
   478  				GW:  route.Gw,
   479  			})
   480  		}
   481  	}
   482  
   483  	return result, nil
   484  }
   485  
   486  func mustParseAddr(addr string) *netlink.Addr {
   487  	r, err := netlink.ParseAddr(addr)
   488  	if err != nil {
   489  		log.Panicf("Failed to parse address %q: %v", addr, err)
   490  	}
   491  	return r
   492  }
   493  
   494  func bringUpLoopback() error {
   495  	// lo interface is already there in the new ns but it's down
   496  	lo, err := netlink.LinkByName(loopbackInterfaceName)
   497  	if err != nil {
   498  		return fmt.Errorf("failed to find link %q: %v", loopbackInterfaceName, err)
   499  	}
   500  	if err := netlink.LinkSetUp(lo); err != nil {
   501  		return fmt.Errorf("failed to bring up link %q: %v", loopbackInterfaceName, err)
   502  	}
   503  	return nil
   504  }
   505  
   506  func updateEbTables(nsPath, interfaceName, command string) error {
   507  	// block/unblock DHCP traffic from/to CNI-provided link
   508  	for _, item := range []struct{ chain, opt string }{
   509  		// dhcp responses originate from bridge itself
   510  		{"OUTPUT", "--ip-source-port"},
   511  		// dhcp requests originate from the VM
   512  		{"FORWARD", "--ip-destination-port"},
   513  	} {
   514  		if out, err := exec.Command(
   515  			"nsenter", "--net="+nsPath,
   516  			"ebtables", command, item.chain, "-p", "IPV4", "--ip-protocol", "UDP",
   517  			item.opt, "67", "--out-if", interfaceName, "-j", "DROP").CombinedOutput(); err != nil {
   518  			return fmt.Errorf("[netns %q] ebtables failed: %v\nOut:\n%s", nsPath, err, out)
   519  		}
   520  	}
   521  
   522  	return nil
   523  }
   524  
   525  func disableMacLearning(nsPath string, bridgeName string) error {
   526  	if out, err := exec.Command("nsenter", "--net="+nsPath, "brctl", "setageing", bridgeName, "0").CombinedOutput(); err != nil {
   527  		return fmt.Errorf("[netns %q] brctl failed: %v\nOut:\n%s", nsPath, err, out)
   528  	}
   529  
   530  	return nil
   531  }
   532  
   533  // SetHardwareAddr sets hardware address on provided link.
   534  func SetHardwareAddr(link netlink.Link, hwAddr net.HardwareAddr) error {
   535  	if err := netlink.LinkSetDown(link); err != nil {
   536  		return fmt.Errorf("can't bring down the link: %v", err)
   537  	}
   538  	if err := netlink.LinkSetHardwareAddr(link, hwAddr); err != nil {
   539  		return fmt.Errorf("can't set hardware address for the link: %v", err)
   540  	}
   541  	if err := netlink.LinkSetUp(link); err != nil {
   542  		return fmt.Errorf("can't bring up the link: %v", err)
   543  	}
   544  
   545  	return nil
   546  }
   547  
   548  func setupTapAndGetInterfaceDescription(link netlink.Link, nsPath string, ifaceNo int) (*network.InterfaceDescription, error) {
   549  	hwAddr := link.Attrs().HardwareAddr
   550  	ifaceName := link.Attrs().Name
   551  
   552  	mtu := link.Attrs().MTU
   553  
   554  	newHwAddr, err := GenerateMacAddress()
   555  	if err == nil {
   556  		err = SetHardwareAddr(link, newHwAddr)
   557  	}
   558  	if err != nil {
   559  		return nil, err
   560  	}
   561  
   562  	tapInterfaceName := fmt.Sprintf(tapInterfaceNameTemplate, ifaceNo)
   563  	tap, err := CreateTAP(tapInterfaceName, mtu)
   564  	if err != nil {
   565  		return nil, err
   566  	}
   567  
   568  	containerBridgeName := fmt.Sprintf(containerBridgeNameTemplate, ifaceNo)
   569  	br, err := SetupBridge(containerBridgeName, []netlink.Link{link, tap})
   570  	if err != nil {
   571  		return nil, fmt.Errorf("failed to create bridge: %v", err)
   572  	}
   573  
   574  	if err := netlink.AddrAdd(br, mustParseAddr(internalDhcpAddr)); err != nil {
   575  		return nil, fmt.Errorf("failed to set address for the bridge: %v", err)
   576  	}
   577  
   578  	// Add ebtables DHCP blocking rules
   579  	if err := updateEbTables(nsPath, ifaceName, "-A"); err != nil {
   580  		return nil, err
   581  	}
   582  
   583  	// Work around bridge MAC learning problem
   584  	// https://ubuntuforums.org/showthread.php?t=2329373&s=cf580a41179e0f186ad4e625834a1d61&p=13511965#post13511965
   585  	// (affects Flannel)
   586  	if err := disableMacLearning(nsPath, containerBridgeName); err != nil {
   587  		return nil, err
   588  	}
   589  
   590  	if err := bringUpLoopback(); err != nil {
   591  		return nil, err
   592  	}
   593  
   594  	glog.V(3).Infof("Opening tap interface %q for link %q", tapInterfaceName, ifaceName)
   595  	fo, err := OpenTAP(tapInterfaceName)
   596  	if err != nil {
   597  		return nil, fmt.Errorf("failed to open tap: %v", err)
   598  	}
   599  	glog.V(3).Infof("Adding interface %q as %q", ifaceName, tapInterfaceName)
   600  
   601  	return &network.InterfaceDescription{
   602  		Type:         network.InterfaceTypeTap,
   603  		Name:         ifaceName,
   604  		Fo:           fo,
   605  		HardwareAddr: hwAddr,
   606  		MTU:          uint16(mtu),
   607  	}, nil
   608  }
   609  
   610  // SetupContainerSideNetwork sets up networking in container
   611  // namespace.  It does so by preparing the following
   612  // network interfaces in container ns:
   613  //     tapX      - tap interface for the each interface to pass to VM
   614  //     brX       - a bridge that joins above tapX and original CNI interface
   615  // with X denoting an link index in info.Interfaces list.
   616  // Each bridge gets assigned a link-local address to be used
   617  // for dhcp server.
   618  // In case of SR-IOV VFs this function only sets up a device to be passed to VM.
   619  // The function should be called from within container namespace.
   620  // Returns container network struct and an error, if any.
   621  func SetupContainerSideNetwork(info *cnicurrent.Result, nsPath string, allLinks []netlink.Link, enableSriov bool, hostNS ns.NetNS) (*network.ContainerSideNetwork, error) {
   622  	contLinks, err := getContainerLinks(info)
   623  	if err != nil {
   624  		return nil, err
   625  	}
   626  
   627  	var interfaces []*network.InterfaceDescription
   628  	for i, link := range contLinks {
   629  		if link == nil {
   630  			return nil, fmt.Errorf("missing link #%d in the container network namespace (Virtlet pod restarted?)", i)
   631  		}
   632  
   633  		if err := StripLink(link); err != nil {
   634  			return nil, err
   635  		}
   636  
   637  		var ifDesc *network.InterfaceDescription
   638  
   639  		if isSriovVf(link) {
   640  			if !enableSriov {
   641  				return nil, fmt.Errorf("SR-IOV device configured in container network namespace while Virtlet is configured with disabled SR-IOV support")
   642  			}
   643  			if ifDesc, err = setupSriovAndGetInterfaceDescription(link, hostNS); err != nil {
   644  				return nil, err
   645  			}
   646  		} else {
   647  			if ifDesc, err = setupTapAndGetInterfaceDescription(link, nsPath, i); err != nil {
   648  				return nil, err
   649  			}
   650  		}
   651  
   652  		interfaces = append(interfaces, ifDesc)
   653  	}
   654  
   655  	return &network.ContainerSideNetwork{info, nsPath, interfaces}, nil
   656  }
   657  
   658  // RecoverContainerSideNetwork tries to populate ContainerSideNetwork
   659  // structure based on a network namespace that was already adjusted for Virtlet
   660  func RecoverContainerSideNetwork(csn *network.ContainerSideNetwork, nsPath string, allLinks []netlink.Link, hostNS ns.NetNS) error {
   661  	if len(csn.Result.Interfaces) == 0 {
   662  		return fmt.Errorf("wrong cni configuration: no interfaces defined: %s", spew.Sdump(csn.Result))
   663  	}
   664  
   665  	contLinks, err := getContainerLinks(csn.Result)
   666  	if err != nil {
   667  		return err
   668  	}
   669  
   670  	oldDescs := map[string]*network.InterfaceDescription{}
   671  	for _, desc := range csn.Interfaces {
   672  		oldDescs[desc.Name] = desc
   673  	}
   674  
   675  	for _, link := range contLinks {
   676  		// Skip missing link which is already used by running VM
   677  		if link == nil {
   678  			continue
   679  		}
   680  		ifaceName := link.Attrs().Name
   681  		desc, found := oldDescs[ifaceName]
   682  		if !found {
   683  			glog.Warningf("Recovering container side network: missing description for interface %q", ifaceName)
   684  		}
   685  		delete(oldDescs, ifaceName)
   686  		var ifaceType network.InterfaceType
   687  
   688  		if isSriovVf(link) {
   689  			ifaceType = network.InterfaceTypeVF
   690  
   691  			// device should be already unbound, but after machine reboot that can be necessary
   692  			unbindDriverFromDevice(desc.PCIAddress)
   693  
   694  			devIdentifier, err := getDeviceIdentifier(desc.PCIAddress)
   695  			if err != nil {
   696  				return err
   697  			}
   698  
   699  			// this can be problematic in case of machine reboot - we are trying to use the same
   700  			// devices as was used before reboot, but in meantime there is small chance that they
   701  			// were used already by sriov cni plugin (for which reboot means it's starting everything
   702  			// from clean situation) for some other pods, before even virtlet was started
   703  			// also in case of virtlet pod restart - device can be already bound to vfio-pci, so we
   704  			// are ignoring any error there)
   705  			bindDeviceToVFIO(devIdentifier)
   706  		} else {
   707  			ifaceType = network.InterfaceTypeTap
   708  			// It's OK if OpenTAP failed as the device is busy and used by running VM
   709  			if fo, err := OpenTAP(link.Attrs().Name); err == nil {
   710  				desc.Fo = fo
   711  			}
   712  		}
   713  		if desc.Type != ifaceType {
   714  			return fmt.Errorf("bad interface type for %q", desc.Name)
   715  		}
   716  	}
   717  
   718  	return nil
   719  }
   720  
   721  // TeardownBridge removes links from bridge and sets it down
   722  func TeardownBridge(bridge netlink.Link, links []netlink.Link) error {
   723  	for _, link := range links {
   724  		if err := netlink.LinkSetNoMaster(link); err != nil {
   725  			return err
   726  		}
   727  	}
   728  
   729  	return netlink.LinkSetDown(bridge)
   730  }
   731  
   732  // ConfigureLink configures a link according to the CNI result
   733  func ConfigureLink(link netlink.Link, info *cnicurrent.Result) error {
   734  	ifaceNo := -1
   735  	linkMAC := link.Attrs().HardwareAddr.String()
   736  	for i, iface := range info.Interfaces {
   737  		if iface.Mac == linkMAC {
   738  			ifaceNo = i
   739  			break
   740  		}
   741  	}
   742  	if ifaceNo == -1 {
   743  		return fmt.Errorf("can't find link with MAC %q in saved cni result: %s", linkMAC, spew.Sdump(info))
   744  	}
   745  
   746  	for _, addr := range info.IPs {
   747  		if addr.Interface == ifaceNo {
   748  			linkAddr := &netlink.Addr{IPNet: &addr.Address}
   749  			if err := netlink.AddrAdd(link, linkAddr); err != nil {
   750  				return fmt.Errorf("error adding address %v to link %q: %v", addr.Address, link.Attrs().Name, err)
   751  			}
   752  
   753  			for _, route := range info.Routes {
   754  				// TODO: that's too naive - if there are more than one interfaces which have this gw address
   755  				// in their subnet - same gw will be added on both of them
   756  				// in theory this should be ok, but there is can lead to configuration other than prepared
   757  				// by cni plugins
   758  				if linkAddr.Contains(route.GW) {
   759  					err := netlink.RouteAdd(&netlink.Route{
   760  						LinkIndex: link.Attrs().Index,
   761  						Scope:     SCOPE_UNIVERSE,
   762  						Dst:       &route.Dst,
   763  						Gw:        route.GW,
   764  					})
   765  					if err != nil {
   766  						return fmt.Errorf("error adding route (dst %v gw %v): %v", route.Dst, route.GW, err)
   767  					}
   768  				}
   769  			}
   770  		}
   771  	}
   772  
   773  	return nil
   774  }
   775  
   776  // Teardown cleans up container network configuration.
   777  // It does so by invoking teardown sequence which removes ebtables rules, links
   778  // and addresses in an order opposite to that of their creation in SetupContainerSideNetwork.
   779  // The end result is the same network configuration in the container network namespace
   780  // as it was before SetupContainerSideNetwork() call.
   781  func Teardown(csn *network.ContainerSideNetwork) error {
   782  	for _, i := range csn.Interfaces {
   783  		i.Fo.Close()
   784  	}
   785  
   786  	contLinks, err := getContainerLinks(csn.Result)
   787  	if err != nil {
   788  		return err
   789  	}
   790  
   791  	for i, contLink := range contLinks {
   792  		if contLink == nil {
   793  			return fmt.Errorf("missing %d link during teardown", i)
   794  		}
   795  
   796  		// Remove ebtables DHCP rules
   797  		if err := updateEbTables(csn.NsPath, contLink.Attrs().Name, "-D"); err != nil {
   798  			return nil
   799  		}
   800  
   801  		if !isSriovVf(contLink) {
   802  			tapInterfaceName := fmt.Sprintf(tapInterfaceNameTemplate, i)
   803  			tap, err := netlink.LinkByName(tapInterfaceName)
   804  			if err != nil {
   805  				return err
   806  			}
   807  
   808  			containerBridgeName := fmt.Sprintf(containerBridgeNameTemplate, i)
   809  			br, err := netlink.LinkByName(containerBridgeName)
   810  			if err != nil {
   811  				return err
   812  			}
   813  
   814  			if err := netlink.AddrDel(br, mustParseAddr(internalDhcpAddr)); err != nil {
   815  				return err
   816  			}
   817  
   818  			if err := TeardownBridge(br, []netlink.Link{contLink, tap}); err != nil {
   819  				return err
   820  			}
   821  
   822  			if err := netlink.LinkDel(br); err != nil {
   823  				return err
   824  			}
   825  
   826  			if err := netlink.LinkSetDown(tap); err != nil {
   827  				return err
   828  			}
   829  
   830  			if err := netlink.LinkDel(tap); err != nil {
   831  				return err
   832  			}
   833  
   834  			if err := SetHardwareAddr(contLink, csn.Interfaces[i].HardwareAddr); err != nil {
   835  				return err
   836  			}
   837  		}
   838  
   839  		rereadLink, err := netlink.LinkByName(contLink.Attrs().Name)
   840  		if err != nil {
   841  			return err
   842  		}
   843  		if err := ConfigureLink(rereadLink, csn.Result); err != nil {
   844  			return err
   845  		}
   846  	}
   847  
   848  	return nil
   849  }
   850  
   851  // GenerateMacAddress returns a random locally administrated unicast
   852  // hardware address.
   853  // Copied from:
   854  // https://github.com/coreos/rkt/blob/56564bac090b44788684040f2ffd66463f29d5d0/stage1/init/kvm/network.go#L71
   855  func GenerateMacAddress() (net.HardwareAddr, error) {
   856  	mac := net.HardwareAddr{
   857  		2,          // locally administred unicast
   858  		0x65, 0x02, // OUI (randomly chosen by jell)
   859  		0, 0, 0, // bytes to randomly overwrite
   860  	}
   861  
   862  	_, err := rand.Reader.Read(mac[3:6])
   863  	if err != nil {
   864  		return nil, fmt.Errorf("cannot generate random mac address: %v", err)
   865  	}
   866  
   867  	return mac, nil
   868  }