github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/drivers/overlay/ov_network.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"encoding/json"
     8  	"fmt"
     9  	"net"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"runtime"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/docker/docker/libnetwork/datastore"
    19  	"github.com/docker/docker/libnetwork/driverapi"
    20  	"github.com/docker/docker/libnetwork/netlabel"
    21  	"github.com/docker/docker/libnetwork/netutils"
    22  	"github.com/docker/docker/libnetwork/ns"
    23  	"github.com/docker/docker/libnetwork/osl"
    24  	"github.com/docker/docker/libnetwork/resolvconf"
    25  	"github.com/docker/docker/libnetwork/types"
    26  	"github.com/docker/docker/pkg/reexec"
    27  	"github.com/sirupsen/logrus"
    28  	"github.com/vishvananda/netlink"
    29  	"github.com/vishvananda/netlink/nl"
    30  	"github.com/vishvananda/netns"
    31  	"golang.org/x/sys/unix"
    32  )
    33  
    34  var (
    35  	hostMode    bool
    36  	networkOnce sync.Once
    37  	networkMu   sync.Mutex
    38  	vniTbl      = make(map[uint32]string)
    39  )
    40  
    41  type networkTable map[string]*network
    42  
    43  type subnet struct {
    44  	sboxInit  bool
    45  	vxlanName string
    46  	brName    string
    47  	vni       uint32
    48  	initErr   error
    49  	subnetIP  *net.IPNet
    50  	gwIP      *net.IPNet
    51  }
    52  
    53  type subnetJSON struct {
    54  	SubnetIP string
    55  	GwIP     string
    56  	Vni      uint32
    57  }
    58  
    59  type network struct {
    60  	id        string
    61  	dbIndex   uint64
    62  	dbExists  bool
    63  	sbox      osl.Sandbox
    64  	nlSocket  *nl.NetlinkSocket
    65  	endpoints endpointTable
    66  	driver    *driver
    67  	joinCnt   int
    68  	sboxInit  bool
    69  	initEpoch int
    70  	initErr   error
    71  	subnets   []*subnet
    72  	secure    bool
    73  	mtu       int
    74  	sync.Mutex
    75  }
    76  
    77  func init() {
    78  	reexec.Register("set-default-vlan", setDefaultVlan)
    79  }
    80  
    81  func setDefaultVlan() {
    82  	if len(os.Args) < 3 {
    83  		logrus.Error("insufficient number of arguments")
    84  		os.Exit(1)
    85  	}
    86  
    87  	runtime.LockOSThread()
    88  	defer runtime.UnlockOSThread()
    89  
    90  	nsPath := os.Args[1]
    91  	ns, err := netns.GetFromPath(nsPath)
    92  	if err != nil {
    93  		logrus.Errorf("overlay namespace get failed, %v", err)
    94  		os.Exit(1)
    95  	}
    96  	if err = netns.Set(ns); err != nil {
    97  		logrus.Errorf("setting into overlay namespace failed, %v", err)
    98  		os.Exit(1)
    99  	}
   100  
   101  	// make sure the sysfs mount doesn't propagate back
   102  	if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
   103  		logrus.Errorf("unshare failed, %v", err)
   104  		os.Exit(1)
   105  	}
   106  
   107  	flag := unix.MS_PRIVATE | unix.MS_REC
   108  	if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
   109  		logrus.Errorf("root mount failed, %v", err)
   110  		os.Exit(1)
   111  	}
   112  
   113  	if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
   114  		logrus.Errorf("mounting sysfs failed, %v", err)
   115  		os.Exit(1)
   116  	}
   117  
   118  	brName := os.Args[2]
   119  	path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
   120  	data := []byte{'0', '\n'}
   121  
   122  	if err = os.WriteFile(path, data, 0644); err != nil {
   123  		logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err)
   124  		os.Exit(1)
   125  	}
   126  	os.Exit(0)
   127  }
   128  
   129  func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
   130  	return nil, types.NotImplementedErrorf("not implemented")
   131  }
   132  
   133  func (d *driver) NetworkFree(id string) error {
   134  	return types.NotImplementedErrorf("not implemented")
   135  }
   136  
   137  func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
   138  	if id == "" {
   139  		return fmt.Errorf("invalid network id")
   140  	}
   141  	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
   142  		return types.BadRequestErrorf("ipv4 pool is empty")
   143  	}
   144  
   145  	// Since we perform lazy configuration make sure we try
   146  	// configuring the driver when we enter CreateNetwork
   147  	if err := d.configure(); err != nil {
   148  		return err
   149  	}
   150  
   151  	n := &network{
   152  		id:        id,
   153  		driver:    d,
   154  		endpoints: endpointTable{},
   155  		subnets:   []*subnet{},
   156  	}
   157  
   158  	vnis := make([]uint32, 0, len(ipV4Data))
   159  	if gval, ok := option[netlabel.GenericData]; ok {
   160  		optMap := gval.(map[string]string)
   161  		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
   162  			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
   163  			vniStrings := strings.Split(val, ",")
   164  			for _, vniStr := range vniStrings {
   165  				vni, err := strconv.Atoi(vniStr)
   166  				if err != nil {
   167  					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
   168  				}
   169  
   170  				vnis = append(vnis, uint32(vni))
   171  			}
   172  		}
   173  		if _, ok := optMap[secureOption]; ok {
   174  			n.secure = true
   175  		}
   176  		if val, ok := optMap[netlabel.DriverMTU]; ok {
   177  			var err error
   178  			if n.mtu, err = strconv.Atoi(val); err != nil {
   179  				return fmt.Errorf("failed to parse %v: %v", val, err)
   180  			}
   181  			if n.mtu < 0 {
   182  				return fmt.Errorf("invalid MTU value: %v", n.mtu)
   183  			}
   184  		}
   185  	}
   186  
   187  	// If we are getting vnis from libnetwork, either we get for
   188  	// all subnets or none.
   189  	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
   190  		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
   191  	}
   192  
   193  	for i, ipd := range ipV4Data {
   194  		s := &subnet{
   195  			subnetIP: ipd.Pool,
   196  			gwIP:     ipd.Gateway,
   197  		}
   198  
   199  		if len(vnis) != 0 {
   200  			s.vni = vnis[i]
   201  		}
   202  
   203  		n.subnets = append(n.subnets, s)
   204  	}
   205  
   206  	d.Lock()
   207  	defer d.Unlock()
   208  	if d.networks[n.id] != nil {
   209  		return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
   210  	}
   211  
   212  	if err := n.writeToStore(); err != nil {
   213  		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
   214  	}
   215  
   216  	// Make sure no rule is on the way from any stale secure network
   217  	if !n.secure {
   218  		for _, vni := range vnis {
   219  			programMangle(vni, false)
   220  			programInput(vni, false)
   221  		}
   222  	}
   223  
   224  	if nInfo != nil {
   225  		if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
   226  			// XXX Undo writeToStore?  No method to so.  Why?
   227  			return err
   228  		}
   229  	}
   230  
   231  	d.networks[id] = n
   232  
   233  	return nil
   234  }
   235  
   236  func (d *driver) DeleteNetwork(nid string) error {
   237  	if nid == "" {
   238  		return fmt.Errorf("invalid network id")
   239  	}
   240  
   241  	// Make sure driver resources are initialized before proceeding
   242  	if err := d.configure(); err != nil {
   243  		return err
   244  	}
   245  
   246  	d.Lock()
   247  	// Only perform a peer flush operation (if required) AFTER unlocking
   248  	// the driver lock to avoid deadlocking w/ the peerDB.
   249  	var doPeerFlush bool
   250  	defer func() {
   251  		d.Unlock()
   252  		if doPeerFlush {
   253  			d.peerFlush(nid)
   254  		}
   255  	}()
   256  
   257  	// This is similar to d.network(), but we need to keep holding the lock
   258  	// until we are done removing this network.
   259  	n, ok := d.networks[nid]
   260  	if !ok {
   261  		n = d.restoreNetworkFromStore(nid)
   262  	}
   263  	if n == nil {
   264  		return fmt.Errorf("could not find network with id %s", nid)
   265  	}
   266  
   267  	for _, ep := range n.endpoints {
   268  		if ep.ifName != "" {
   269  			if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil {
   270  				if err := ns.NlHandle().LinkDel(link); err != nil {
   271  					logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id)
   272  				}
   273  			}
   274  		}
   275  
   276  		if err := d.deleteEndpointFromStore(ep); err != nil {
   277  			logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
   278  		}
   279  	}
   280  
   281  	doPeerFlush = true
   282  	delete(d.networks, nid)
   283  
   284  	vnis, err := n.releaseVxlanID()
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	if n.secure {
   290  		for _, vni := range vnis {
   291  			programMangle(vni, false)
   292  			programInput(vni, false)
   293  		}
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
   300  	return nil
   301  }
   302  
   303  func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
   304  	return nil
   305  }
   306  
   307  func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error {
   308  	// If there is a race between two go routines here only one will win
   309  	// the other will wait.
   310  	networkOnce.Do(networkOnceInit)
   311  
   312  	n.Lock()
   313  	// If non-restore initialization occurred and was successful then
   314  	// tell the peerDB to initialize the sandbox with all the peers
   315  	// previously received from networkdb.  But only do this after
   316  	// unlocking the network.  Otherwise we could deadlock with
   317  	// on the peerDB channel while peerDB is waiting for the network lock.
   318  	var doInitPeerDB bool
   319  	defer func() {
   320  		n.Unlock()
   321  		if doInitPeerDB {
   322  			n.driver.initSandboxPeerDB(n.id)
   323  		}
   324  	}()
   325  
   326  	if !n.sboxInit {
   327  		n.initErr = n.initSandbox(restore)
   328  		doInitPeerDB = n.initErr == nil && !restore
   329  		// If there was an error, we cannot recover it
   330  		n.sboxInit = true
   331  	}
   332  
   333  	if n.initErr != nil {
   334  		return fmt.Errorf("network sandbox join failed: %v", n.initErr)
   335  	}
   336  
   337  	subnetErr := s.initErr
   338  	if !s.sboxInit {
   339  		subnetErr = n.initSubnetSandbox(s, restore)
   340  		// We can recover from these errors, but not on restore
   341  		if restore || subnetErr == nil {
   342  			s.initErr = subnetErr
   343  			s.sboxInit = true
   344  		}
   345  	}
   346  	if subnetErr != nil {
   347  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr)
   348  	}
   349  
   350  	if incJoinCount {
   351  		n.joinCnt++
   352  	}
   353  
   354  	return nil
   355  }
   356  
   357  func (n *network) leaveSandbox() {
   358  	n.Lock()
   359  	defer n.Unlock()
   360  	n.joinCnt--
   361  	if n.joinCnt != 0 {
   362  		return
   363  	}
   364  
   365  	n.destroySandbox()
   366  
   367  	n.sboxInit = false
   368  	n.initErr = nil
   369  	for _, s := range n.subnets {
   370  		s.sboxInit = false
   371  		s.initErr = nil
   372  	}
   373  }
   374  
   375  // to be called while holding network lock
   376  func (n *network) destroySandbox() {
   377  	if n.sbox != nil {
   378  		for _, iface := range n.sbox.Info().Interfaces() {
   379  			if err := iface.Remove(); err != nil {
   380  				logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
   381  			}
   382  		}
   383  
   384  		for _, s := range n.subnets {
   385  			if hostMode {
   386  				if err := removeFilters(n.id[:12], s.brName); err != nil {
   387  					logrus.Warnf("Could not remove overlay filters: %v", err)
   388  				}
   389  			}
   390  
   391  			if s.vxlanName != "" {
   392  				err := deleteInterface(s.vxlanName)
   393  				if err != nil {
   394  					logrus.Warnf("could not cleanup sandbox properly: %v", err)
   395  				}
   396  			}
   397  		}
   398  
   399  		if hostMode {
   400  			if err := removeNetworkChain(n.id[:12]); err != nil {
   401  				logrus.Warnf("could not remove network chain: %v", err)
   402  			}
   403  		}
   404  
   405  		// Close the netlink socket, this will also release the watchMiss goroutine that is using it
   406  		if n.nlSocket != nil {
   407  			n.nlSocket.Close()
   408  			n.nlSocket = nil
   409  		}
   410  
   411  		n.sbox.Destroy()
   412  		n.sbox = nil
   413  	}
   414  }
   415  
   416  func populateVNITbl() {
   417  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   418  		// NOTE(cpuguy83): The linter picked up on the fact that this walk function was not using this error argument
   419  		// That seems wrong... however I'm not familiar with this code or if that error matters
   420  		func(path string, info os.FileInfo, _ error) error {
   421  			_, fname := filepath.Split(path)
   422  
   423  			if len(strings.Split(fname, "-")) <= 1 {
   424  				return nil
   425  			}
   426  
   427  			ns, err := netns.GetFromPath(path)
   428  			if err != nil {
   429  				logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err)
   430  				return nil
   431  			}
   432  			defer ns.Close()
   433  
   434  			nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE)
   435  			if err != nil {
   436  				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
   437  				return nil
   438  			}
   439  			defer nlh.Delete()
   440  
   441  			err = nlh.SetSocketTimeout(soTimeout)
   442  			if err != nil {
   443  				logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err)
   444  			}
   445  
   446  			links, err := nlh.LinkList()
   447  			if err != nil {
   448  				logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err)
   449  				return nil
   450  			}
   451  
   452  			for _, l := range links {
   453  				if l.Type() == "vxlan" {
   454  					vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path
   455  				}
   456  			}
   457  
   458  			return nil
   459  		})
   460  }
   461  
   462  func networkOnceInit() {
   463  	populateVNITbl()
   464  
   465  	if os.Getenv("_OVERLAY_HOST_MODE") != "" {
   466  		hostMode = true
   467  		return
   468  	}
   469  
   470  	err := createVxlan("testvxlan", 1, 0)
   471  	if err != nil {
   472  		logrus.Errorf("Failed to create testvxlan interface: %v", err)
   473  		return
   474  	}
   475  
   476  	defer deleteInterface("testvxlan")
   477  
   478  	path := "/proc/self/ns/net"
   479  	hNs, err := netns.GetFromPath(path)
   480  	if err != nil {
   481  		logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
   482  		return
   483  	}
   484  	defer hNs.Close()
   485  
   486  	nlh := ns.NlHandle()
   487  
   488  	iface, err := nlh.LinkByName("testvxlan")
   489  	if err != nil {
   490  		logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
   491  		return
   492  	}
   493  
   494  	// If we are not able to move the vxlan interface to a namespace
   495  	// then fallback to host mode
   496  	if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
   497  		hostMode = true
   498  	}
   499  }
   500  
   501  func (n *network) generateVxlanName(s *subnet) string {
   502  	id := n.id
   503  	if len(n.id) > 5 {
   504  		id = n.id[:5]
   505  	}
   506  
   507  	return fmt.Sprintf("vx-%06x-%v", s.vni, id)
   508  }
   509  
   510  func (n *network) generateBridgeName(s *subnet) string {
   511  	id := n.id
   512  	if len(n.id) > 5 {
   513  		id = n.id[:5]
   514  	}
   515  
   516  	return n.getBridgeNamePrefix(s) + "-" + id
   517  }
   518  
   519  func (n *network) getBridgeNamePrefix(s *subnet) string {
   520  	return fmt.Sprintf("ov-%06x", s.vni)
   521  }
   522  
   523  func checkOverlap(nw *net.IPNet) error {
   524  	var nameservers []string
   525  
   526  	if rc, err := resolvconf.Get(); err == nil {
   527  		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
   528  	}
   529  
   530  	if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
   531  		return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
   532  	}
   533  
   534  	if err := netutils.CheckRouteOverlaps(nw); err != nil {
   535  		return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
   536  	}
   537  
   538  	return nil
   539  }
   540  
   541  func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
   542  	sbox := n.sbox
   543  
   544  	// restore overlay osl sandbox
   545  	Ifaces := make(map[string][]osl.IfaceOption)
   546  	brIfaceOption := make([]osl.IfaceOption, 2)
   547  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
   548  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
   549  	Ifaces[brName+"+br"] = brIfaceOption
   550  
   551  	err := sbox.Restore(Ifaces, nil, nil, nil)
   552  	if err != nil {
   553  		return err
   554  	}
   555  
   556  	Ifaces = make(map[string][]osl.IfaceOption)
   557  	vxlanIfaceOption := make([]osl.IfaceOption, 1)
   558  	vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
   559  	Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
   560  	return sbox.Restore(Ifaces, nil, nil, nil)
   561  }
   562  
   563  func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
   564  
   565  	if hostMode {
   566  		// Try to delete stale bridge interface if it exists
   567  		if err := deleteInterface(brName); err != nil {
   568  			deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
   569  		}
   570  		// Try to delete the vxlan interface by vni if already present
   571  		deleteVxlanByVNI("", s.vni)
   572  
   573  		if err := checkOverlap(s.subnetIP); err != nil {
   574  			return err
   575  		}
   576  	}
   577  
   578  	if !hostMode {
   579  		// Try to find this subnet's vni is being used in some
   580  		// other namespace by looking at vniTbl that we just
   581  		// populated in the once init. If a hit is found then
   582  		// it must a stale namespace from previous
   583  		// life. Destroy it completely and reclaim resourced.
   584  		networkMu.Lock()
   585  		path, ok := vniTbl[s.vni]
   586  		networkMu.Unlock()
   587  
   588  		if ok {
   589  			deleteVxlanByVNI(path, s.vni)
   590  			if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
   591  				logrus.Errorf("unmount of %s failed: %v", path, err)
   592  			}
   593  			os.Remove(path)
   594  
   595  			networkMu.Lock()
   596  			delete(vniTbl, s.vni)
   597  			networkMu.Unlock()
   598  		}
   599  	}
   600  
   601  	// create a bridge and vxlan device for this subnet and move it to the sandbox
   602  	sbox := n.sbox
   603  
   604  	if err := sbox.AddInterface(brName, "br",
   605  		sbox.InterfaceOptions().Address(s.gwIP),
   606  		sbox.InterfaceOptions().Bridge(true)); err != nil {
   607  		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
   608  	}
   609  
   610  	err := createVxlan(vxlanName, s.vni, n.maxMTU())
   611  	if err != nil {
   612  		return err
   613  	}
   614  
   615  	if err := sbox.AddInterface(vxlanName, "vxlan",
   616  		sbox.InterfaceOptions().Master(brName)); err != nil {
   617  		// If adding vxlan device to the overlay namespace fails, remove the bridge interface we
   618  		// already added to the namespace. This allows the caller to try the setup again.
   619  		for _, iface := range sbox.Info().Interfaces() {
   620  			if iface.SrcName() == brName {
   621  				if ierr := iface.Remove(); ierr != nil {
   622  					logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr)
   623  				}
   624  			}
   625  		}
   626  
   627  		// Also, delete the vxlan interface. Since a global vni id is associated
   628  		// with the vxlan interface, an orphaned vxlan interface will result in
   629  		// failure of vxlan device creation if the vni is assigned to some other
   630  		// network.
   631  		if deleteErr := deleteInterface(vxlanName); deleteErr != nil {
   632  			logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err)
   633  		}
   634  		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
   635  	}
   636  
   637  	if !hostMode {
   638  		var name string
   639  		for _, i := range sbox.Info().Interfaces() {
   640  			if i.Bridge() {
   641  				name = i.DstName()
   642  			}
   643  		}
   644  		cmd := &exec.Cmd{
   645  			Path:   reexec.Self(),
   646  			Args:   []string{"set-default-vlan", sbox.Key(), name},
   647  			Stdout: os.Stdout,
   648  			Stderr: os.Stderr,
   649  		}
   650  		if err := cmd.Run(); err != nil {
   651  			// not a fatal error
   652  			logrus.Errorf("reexec to set bridge default vlan failed %v", err)
   653  		}
   654  	}
   655  
   656  	if hostMode {
   657  		if err := addFilters(n.id[:12], brName); err != nil {
   658  			return err
   659  		}
   660  	}
   661  
   662  	return nil
   663  }
   664  
   665  // Must be called with the network lock
   666  func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
   667  	brName := n.generateBridgeName(s)
   668  	vxlanName := n.generateVxlanName(s)
   669  
   670  	if restore {
   671  		if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
   672  			return err
   673  		}
   674  	} else {
   675  		if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
   676  			return err
   677  		}
   678  	}
   679  
   680  	s.vxlanName = vxlanName
   681  	s.brName = brName
   682  
   683  	return nil
   684  }
   685  
   686  func (n *network) cleanupStaleSandboxes() {
   687  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   688  		func(path string, info os.FileInfo, err error) error {
   689  			_, fname := filepath.Split(path)
   690  
   691  			pList := strings.Split(fname, "-")
   692  			if len(pList) <= 1 {
   693  				return nil
   694  			}
   695  
   696  			pattern := pList[1]
   697  			if strings.Contains(n.id, pattern) {
   698  				// Delete all vnis
   699  				deleteVxlanByVNI(path, 0)
   700  				unix.Unmount(path, unix.MNT_DETACH)
   701  				os.Remove(path)
   702  
   703  				// Now that we have destroyed this
   704  				// sandbox, remove all references to
   705  				// it in vniTbl so that we don't
   706  				// inadvertently destroy the sandbox
   707  				// created in this life.
   708  				networkMu.Lock()
   709  				for vni, tblPath := range vniTbl {
   710  					if tblPath == path {
   711  						delete(vniTbl, vni)
   712  					}
   713  				}
   714  				networkMu.Unlock()
   715  			}
   716  
   717  			return nil
   718  		})
   719  }
   720  
   721  func (n *network) initSandbox(restore bool) error {
   722  	n.initEpoch++
   723  
   724  	if !restore {
   725  		if hostMode {
   726  			if err := addNetworkChain(n.id[:12]); err != nil {
   727  				return err
   728  			}
   729  		}
   730  
   731  		// If there are any stale sandboxes related to this network
   732  		// from previous daemon life clean it up here
   733  		n.cleanupStaleSandboxes()
   734  	}
   735  
   736  	// In the restore case network sandbox already exist; but we don't know
   737  	// what epoch number it was created with. It has to be retrieved by
   738  	// searching the net namespaces.
   739  	var key string
   740  	if restore {
   741  		key = osl.GenerateKey("-" + n.id)
   742  	} else {
   743  		key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
   744  	}
   745  
   746  	sbox, err := osl.NewSandbox(key, !hostMode, restore)
   747  	if err != nil {
   748  		return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
   749  	}
   750  
   751  	// this is needed to let the peerAdd configure the sandbox
   752  	n.sbox = sbox
   753  
   754  	// If we are in swarm mode, we don't need anymore the watchMiss routine.
   755  	// This will save 1 thread and 1 netlink socket per network
   756  	if !n.driver.isSerfAlive() {
   757  		return nil
   758  	}
   759  
   760  	var nlSock *nl.NetlinkSocket
   761  	sbox.InvokeFunc(func() {
   762  		nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
   763  		if err != nil {
   764  			return
   765  		}
   766  		// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
   767  		tv := unix.NsecToTimeval(soTimeout.Nanoseconds())
   768  		err = nlSock.SetReceiveTimeout(&tv)
   769  	})
   770  	n.nlSocket = nlSock
   771  
   772  	if err == nil {
   773  		go n.watchMiss(nlSock, key)
   774  	} else {
   775  		logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v",
   776  			n.id, sbox.Key(), err)
   777  	}
   778  
   779  	return nil
   780  }
   781  
   782  func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
   783  	// With the new version of the netlink library the deserialize function makes
   784  	// requests about the interface of the netlink message. This can succeed only
   785  	// if this go routine is in the target namespace. For this reason following we
   786  	// lock the thread on that namespace
   787  	runtime.LockOSThread()
   788  	defer runtime.UnlockOSThread()
   789  	newNs, err := netns.GetFromPath(nsPath)
   790  	if err != nil {
   791  		logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath)
   792  		return
   793  	}
   794  	defer newNs.Close()
   795  	if err = netns.Set(newNs); err != nil {
   796  		logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath)
   797  		return
   798  	}
   799  	for {
   800  		msgs, _, err := nlSock.Receive()
   801  		if err != nil {
   802  			n.Lock()
   803  			nlFd := nlSock.GetFd()
   804  			n.Unlock()
   805  			if nlFd == -1 {
   806  				// The netlink socket got closed, simply exit to not leak this goroutine
   807  				return
   808  			}
   809  			// When the receive timeout expires the receive will return EAGAIN
   810  			if err == unix.EAGAIN {
   811  				// we continue here to avoid spam for timeouts
   812  				continue
   813  			}
   814  			logrus.Errorf("Failed to receive from netlink: %v ", err)
   815  			continue
   816  		}
   817  
   818  		for _, msg := range msgs {
   819  			if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH {
   820  				continue
   821  			}
   822  
   823  			neigh, err := netlink.NeighDeserialize(msg.Data)
   824  			if err != nil {
   825  				logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
   826  				continue
   827  			}
   828  
   829  			var (
   830  				ip             net.IP
   831  				mac            net.HardwareAddr
   832  				l2Miss, l3Miss bool
   833  			)
   834  			if neigh.IP.To4() != nil {
   835  				ip = neigh.IP
   836  				l3Miss = true
   837  			} else if neigh.HardwareAddr != nil {
   838  				mac = []byte(neigh.HardwareAddr)
   839  				ip = net.IP(mac[2:])
   840  				l2Miss = true
   841  			} else {
   842  				continue
   843  			}
   844  
   845  			// Not any of the network's subnets. Ignore.
   846  			if !n.contains(ip) {
   847  				continue
   848  			}
   849  
   850  			if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
   851  				continue
   852  			}
   853  
   854  			logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
   855  			mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
   856  			if err != nil {
   857  				logrus.Errorf("could not resolve peer %q: %v", ip, err)
   858  				continue
   859  			}
   860  			n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
   861  		}
   862  	}
   863  }
   864  
   865  // Restore a network from the store to the driver if it is present.
   866  // Must be called with the driver locked!
   867  func (d *driver) restoreNetworkFromStore(nid string) *network {
   868  	n := d.getNetworkFromStore(nid)
   869  	if n != nil {
   870  		n.driver = d
   871  		n.endpoints = endpointTable{}
   872  		d.networks[nid] = n
   873  	}
   874  	return n
   875  }
   876  
   877  func (d *driver) network(nid string) *network {
   878  	d.Lock()
   879  	n, ok := d.networks[nid]
   880  	if !ok {
   881  		n = d.restoreNetworkFromStore(nid)
   882  	}
   883  	d.Unlock()
   884  
   885  	return n
   886  }
   887  
   888  func (d *driver) getNetworkFromStore(nid string) *network {
   889  	if d.store == nil {
   890  		return nil
   891  	}
   892  
   893  	n := &network{id: nid}
   894  	if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
   895  		return nil
   896  	}
   897  
   898  	return n
   899  }
   900  
   901  func (n *network) sandbox() osl.Sandbox {
   902  	n.Lock()
   903  	defer n.Unlock()
   904  	return n.sbox
   905  }
   906  
   907  func (n *network) vxlanID(s *subnet) uint32 {
   908  	n.Lock()
   909  	defer n.Unlock()
   910  	return s.vni
   911  }
   912  
   913  func (n *network) setVxlanID(s *subnet, vni uint32) {
   914  	n.Lock()
   915  	s.vni = vni
   916  	n.Unlock()
   917  }
   918  
   919  func (n *network) Key() []string {
   920  	return []string{"overlay", "network", n.id}
   921  }
   922  
   923  func (n *network) KeyPrefix() []string {
   924  	return []string{"overlay", "network"}
   925  }
   926  
   927  func (n *network) Value() []byte {
   928  	m := map[string]interface{}{}
   929  
   930  	netJSON := []*subnetJSON{}
   931  
   932  	for _, s := range n.subnets {
   933  		sj := &subnetJSON{
   934  			SubnetIP: s.subnetIP.String(),
   935  			GwIP:     s.gwIP.String(),
   936  			Vni:      s.vni,
   937  		}
   938  		netJSON = append(netJSON, sj)
   939  	}
   940  
   941  	m["secure"] = n.secure
   942  	m["subnets"] = netJSON
   943  	m["mtu"] = n.mtu
   944  	b, err := json.Marshal(m)
   945  	if err != nil {
   946  		return []byte{}
   947  	}
   948  
   949  	return b
   950  }
   951  
   952  func (n *network) Index() uint64 {
   953  	return n.dbIndex
   954  }
   955  
   956  func (n *network) SetIndex(index uint64) {
   957  	n.dbIndex = index
   958  	n.dbExists = true
   959  }
   960  
   961  func (n *network) Exists() bool {
   962  	return n.dbExists
   963  }
   964  
   965  func (n *network) Skip() bool {
   966  	return false
   967  }
   968  
   969  func (n *network) SetValue(value []byte) error {
   970  	var (
   971  		m       map[string]interface{}
   972  		newNet  bool
   973  		isMap   = true
   974  		netJSON = []*subnetJSON{}
   975  	)
   976  
   977  	if err := json.Unmarshal(value, &m); err != nil {
   978  		err := json.Unmarshal(value, &netJSON)
   979  		if err != nil {
   980  			return err
   981  		}
   982  		isMap = false
   983  	}
   984  
   985  	if len(n.subnets) == 0 {
   986  		newNet = true
   987  	}
   988  
   989  	if isMap {
   990  		if val, ok := m["secure"]; ok {
   991  			n.secure = val.(bool)
   992  		}
   993  		if val, ok := m["mtu"]; ok {
   994  			n.mtu = int(val.(float64))
   995  		}
   996  		bytes, err := json.Marshal(m["subnets"])
   997  		if err != nil {
   998  			return err
   999  		}
  1000  		if err := json.Unmarshal(bytes, &netJSON); err != nil {
  1001  			return err
  1002  		}
  1003  	}
  1004  
  1005  	for _, sj := range netJSON {
  1006  		subnetIPstr := sj.SubnetIP
  1007  		gwIPstr := sj.GwIP
  1008  		vni := sj.Vni
  1009  
  1010  		subnetIP, _ := types.ParseCIDR(subnetIPstr)
  1011  		gwIP, _ := types.ParseCIDR(gwIPstr)
  1012  
  1013  		if newNet {
  1014  			s := &subnet{
  1015  				subnetIP: subnetIP,
  1016  				gwIP:     gwIP,
  1017  				vni:      vni,
  1018  			}
  1019  			n.subnets = append(n.subnets, s)
  1020  		} else {
  1021  			sNet := n.getMatchingSubnet(subnetIP)
  1022  			if sNet != nil {
  1023  				sNet.vni = vni
  1024  			}
  1025  		}
  1026  	}
  1027  	return nil
  1028  }
  1029  
  1030  func (n *network) DataScope() string {
  1031  	return datastore.GlobalScope
  1032  }
  1033  
  1034  func (n *network) writeToStore() error {
  1035  	if n.driver.store == nil {
  1036  		return nil
  1037  	}
  1038  
  1039  	return n.driver.store.PutObjectAtomic(n)
  1040  }
  1041  
  1042  func (n *network) releaseVxlanID() ([]uint32, error) {
  1043  	n.Lock()
  1044  	nSubnets := len(n.subnets)
  1045  	n.Unlock()
  1046  	if nSubnets == 0 {
  1047  		return nil, nil
  1048  	}
  1049  
  1050  	if n.driver.store != nil {
  1051  		if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  1052  			if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  1053  				// In both the above cases we can safely assume that the key has been removed by some other
  1054  				// instance and so simply get out of here
  1055  				return nil, nil
  1056  			}
  1057  
  1058  			return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  1059  		}
  1060  	}
  1061  	var vnis []uint32
  1062  	n.Lock()
  1063  	for _, s := range n.subnets {
  1064  		if n.driver.vxlanIdm != nil {
  1065  			vnis = append(vnis, s.vni)
  1066  		}
  1067  		s.vni = 0
  1068  	}
  1069  	n.Unlock()
  1070  
  1071  	for _, vni := range vnis {
  1072  		n.driver.vxlanIdm.Release(uint64(vni))
  1073  	}
  1074  
  1075  	return vnis, nil
  1076  }
  1077  
  1078  func (n *network) obtainVxlanID(s *subnet) error {
  1079  	//return if the subnet already has a vxlan id assigned
  1080  	if n.vxlanID(s) != 0 {
  1081  		return nil
  1082  	}
  1083  
  1084  	if n.driver.store == nil {
  1085  		return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
  1086  	}
  1087  
  1088  	for {
  1089  		if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  1090  			return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  1091  		}
  1092  
  1093  		if n.vxlanID(s) == 0 {
  1094  			vxlanID, err := n.driver.vxlanIdm.GetID(true)
  1095  			if err != nil {
  1096  				return fmt.Errorf("failed to allocate vxlan id: %v", err)
  1097  			}
  1098  
  1099  			n.setVxlanID(s, uint32(vxlanID))
  1100  			if err := n.writeToStore(); err != nil {
  1101  				n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  1102  				n.setVxlanID(s, 0)
  1103  				if err == datastore.ErrKeyModified {
  1104  					continue
  1105  				}
  1106  				return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  1107  			}
  1108  			return nil
  1109  		}
  1110  		return nil
  1111  	}
  1112  }
  1113  
  1114  // contains return true if the passed ip belongs to one the network's
  1115  // subnets
  1116  func (n *network) contains(ip net.IP) bool {
  1117  	for _, s := range n.subnets {
  1118  		if s.subnetIP.Contains(ip) {
  1119  			return true
  1120  		}
  1121  	}
  1122  
  1123  	return false
  1124  }
  1125  
  1126  // getSubnetforIP returns the subnet to which the given IP belongs
  1127  func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  1128  	for _, s := range n.subnets {
  1129  		// first check if the mask lengths are the same
  1130  		i, _ := s.subnetIP.Mask.Size()
  1131  		j, _ := ip.Mask.Size()
  1132  		if i != j {
  1133  			continue
  1134  		}
  1135  		if s.subnetIP.Contains(ip.IP) {
  1136  			return s
  1137  		}
  1138  	}
  1139  	return nil
  1140  }
  1141  
  1142  // getMatchingSubnet return the network's subnet that matches the input
  1143  func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  1144  	if ip == nil {
  1145  		return nil
  1146  	}
  1147  	for _, s := range n.subnets {
  1148  		// first check if the mask lengths are the same
  1149  		i, _ := s.subnetIP.Mask.Size()
  1150  		j, _ := ip.Mask.Size()
  1151  		if i != j {
  1152  			continue
  1153  		}
  1154  		if s.subnetIP.IP.Equal(ip.IP) {
  1155  			return s
  1156  		}
  1157  	}
  1158  	return nil
  1159  }