github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/drivers/overlay/ov_network.go (about)

     1  package overlay
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/docker/docker/pkg/reexec"
    17  	"github.com/docker/libnetwork/datastore"
    18  	"github.com/docker/libnetwork/driverapi"
    19  	"github.com/docker/libnetwork/netlabel"
    20  	"github.com/docker/libnetwork/netutils"
    21  	"github.com/docker/libnetwork/ns"
    22  	"github.com/docker/libnetwork/osl"
    23  	"github.com/docker/libnetwork/resolvconf"
    24  	"github.com/docker/libnetwork/types"
    25  	"github.com/hashicorp/go-multierror"
    26  	"github.com/sirupsen/logrus"
    27  	"github.com/vishvananda/netlink"
    28  	"github.com/vishvananda/netlink/nl"
    29  	"github.com/vishvananda/netns"
    30  	"golang.org/x/sys/unix"
    31  )
    32  
    33  var (
    34  	hostMode    bool
    35  	networkOnce sync.Once
    36  	networkMu   sync.Mutex
    37  	vniTbl      = make(map[uint32]string)
    38  )
    39  
    40  type networkTable map[string]*network
    41  
    42  type subnet struct {
    43  	sboxInit  bool
    44  	vxlanName string
    45  	brName    string
    46  	vni       uint32
    47  	initErr   error
    48  	subnetIP  *net.IPNet
    49  	gwIP      *net.IPNet
    50  }
    51  
    52  type subnetJSON struct {
    53  	SubnetIP string
    54  	GwIP     string
    55  	Vni      uint32
    56  }
    57  
    58  type network struct {
    59  	id        string
    60  	dbIndex   uint64
    61  	dbExists  bool
    62  	sbox      osl.Sandbox
    63  	nlSocket  *nl.NetlinkSocket
    64  	endpoints endpointTable
    65  	driver    *driver
    66  	joinCnt   int
    67  	sboxInit  bool
    68  	initEpoch int
    69  	initErr   error
    70  	subnets   []*subnet
    71  	secure    bool
    72  	mtu       int
    73  	sync.Mutex
    74  }
    75  
    76  func init() {
    77  	reexec.Register("set-default-vlan", setDefaultVlan)
    78  }
    79  
    80  func setDefaultVlan() {
    81  	if len(os.Args) < 3 {
    82  		logrus.Error("insufficient number of arguments")
    83  		os.Exit(1)
    84  	}
    85  
    86  	runtime.LockOSThread()
    87  	defer runtime.UnlockOSThread()
    88  
    89  	nsPath := os.Args[1]
    90  	ns, err := netns.GetFromPath(nsPath)
    91  	if err != nil {
    92  		logrus.Errorf("overlay namespace get failed, %v", err)
    93  		os.Exit(1)
    94  	}
    95  	if err = netns.Set(ns); err != nil {
    96  		logrus.Errorf("setting into overlay namespace failed, %v", err)
    97  		os.Exit(1)
    98  	}
    99  
   100  	// make sure the sysfs mount doesn't propagate back
   101  	if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
   102  		logrus.Errorf("unshare failed, %v", err)
   103  		os.Exit(1)
   104  	}
   105  
   106  	flag := unix.MS_PRIVATE | unix.MS_REC
   107  	if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
   108  		logrus.Errorf("root mount failed, %v", err)
   109  		os.Exit(1)
   110  	}
   111  
   112  	if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
   113  		logrus.Errorf("mounting sysfs failed, %v", err)
   114  		os.Exit(1)
   115  	}
   116  
   117  	brName := os.Args[2]
   118  	path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
   119  	data := []byte{'0', '\n'}
   120  
   121  	if err = ioutil.WriteFile(path, data, 0644); err != nil {
   122  		logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err)
   123  		os.Exit(1)
   124  	}
   125  	os.Exit(0)
   126  }
   127  
   128  func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
   129  	return nil, types.NotImplementedErrorf("not implemented")
   130  }
   131  
   132  func (d *driver) NetworkFree(id string) error {
   133  	return types.NotImplementedErrorf("not implemented")
   134  }
   135  
   136  func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
   137  	if id == "" {
   138  		return fmt.Errorf("invalid network id")
   139  	}
   140  	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
   141  		return types.BadRequestErrorf("ipv4 pool is empty")
   142  	}
   143  
   144  	// Since we perform lazy configuration make sure we try
   145  	// configuring the driver when we enter CreateNetwork
   146  	if err := d.configure(); err != nil {
   147  		return err
   148  	}
   149  
   150  	n := &network{
   151  		id:        id,
   152  		driver:    d,
   153  		endpoints: endpointTable{},
   154  		subnets:   []*subnet{},
   155  	}
   156  
   157  	vnis := make([]uint32, 0, len(ipV4Data))
   158  	if gval, ok := option[netlabel.GenericData]; ok {
   159  		optMap := gval.(map[string]string)
   160  		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
   161  			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
   162  			vniStrings := strings.Split(val, ",")
   163  			for _, vniStr := range vniStrings {
   164  				vni, err := strconv.Atoi(vniStr)
   165  				if err != nil {
   166  					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
   167  				}
   168  
   169  				vnis = append(vnis, uint32(vni))
   170  			}
   171  		}
   172  		if _, ok := optMap[secureOption]; ok {
   173  			n.secure = true
   174  		}
   175  		if val, ok := optMap[netlabel.DriverMTU]; ok {
   176  			var err error
   177  			if n.mtu, err = strconv.Atoi(val); err != nil {
   178  				return fmt.Errorf("failed to parse %v: %v", val, err)
   179  			}
   180  			if n.mtu < 0 {
   181  				return fmt.Errorf("invalid MTU value: %v", n.mtu)
   182  			}
   183  		}
   184  	}
   185  
   186  	// If we are getting vnis from libnetwork, either we get for
   187  	// all subnets or none.
   188  	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
   189  		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
   190  	}
   191  
   192  	for i, ipd := range ipV4Data {
   193  		s := &subnet{
   194  			subnetIP: ipd.Pool,
   195  			gwIP:     ipd.Gateway,
   196  		}
   197  
   198  		if len(vnis) != 0 {
   199  			s.vni = vnis[i]
   200  		}
   201  
   202  		n.subnets = append(n.subnets, s)
   203  	}
   204  
   205  	d.Lock()
   206  	defer d.Unlock()
   207  	if d.networks[n.id] != nil {
   208  		return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
   209  	}
   210  
   211  	if err := n.writeToStore(); err != nil {
   212  		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
   213  	}
   214  
   215  	// Make sure no rule is on the way from any stale secure network
   216  	if !n.secure {
   217  		for _, vni := range vnis {
   218  			programMangle(vni, false)
   219  			programInput(vni, false)
   220  		}
   221  	}
   222  
   223  	if nInfo != nil {
   224  		if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
   225  			// XXX Undo writeToStore?  No method to so.  Why?
   226  			return err
   227  		}
   228  	}
   229  
   230  	d.networks[id] = n
   231  
   232  	return nil
   233  }
   234  
   235  func (d *driver) DeleteNetwork(nid string) error {
   236  	if nid == "" {
   237  		return fmt.Errorf("invalid network id")
   238  	}
   239  
   240  	// Make sure driver resources are initialized before proceeding
   241  	if err := d.configure(); err != nil {
   242  		return err
   243  	}
   244  
   245  	d.Lock()
   246  	// Only perform a peer flush operation (if required) AFTER unlocking
   247  	// the driver lock to avoid deadlocking w/ the peerDB.
   248  	var doPeerFlush bool
   249  	defer func() {
   250  		d.Unlock()
   251  		if doPeerFlush {
   252  			d.peerFlush(nid)
   253  		}
   254  	}()
   255  
   256  	// This is similar to d.network(), but we need to keep holding the lock
   257  	// until we are done removing this network.
   258  	n, ok := d.networks[nid]
   259  	if !ok {
   260  		n = d.restoreNetworkFromStore(nid)
   261  	}
   262  	if n == nil {
   263  		return fmt.Errorf("could not find network with id %s", nid)
   264  	}
   265  
   266  	for _, ep := range n.endpoints {
   267  		if ep.ifName != "" {
   268  			if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil {
   269  				if err := ns.NlHandle().LinkDel(link); err != nil {
   270  					logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id)
   271  				}
   272  			}
   273  		}
   274  
   275  		if err := d.deleteEndpointFromStore(ep); err != nil {
   276  			logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
   277  		}
   278  	}
   279  
   280  	doPeerFlush = true
   281  	delete(d.networks, nid)
   282  
   283  	vnis, err := n.releaseVxlanID()
   284  	if err != nil {
   285  		return err
   286  	}
   287  
   288  	if n.secure {
   289  		for _, vni := range vnis {
   290  			programMangle(vni, false)
   291  			programInput(vni, false)
   292  		}
   293  	}
   294  
   295  	return nil
   296  }
   297  
   298  func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
   299  	return nil
   300  }
   301  
   302  func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
   303  	return nil
   304  }
   305  
   306  func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error {
   307  	// If there is a race between two go routines here only one will win
   308  	// the other will wait.
   309  	networkOnce.Do(networkOnceInit)
   310  
   311  	n.Lock()
   312  	// If non-restore initialization occurred and was successful then
   313  	// tell the peerDB to initialize the sandbox with all the peers
   314  	// previously received from networkdb.  But only do this after
   315  	// unlocking the network.  Otherwise we could deadlock with
   316  	// on the peerDB channel while peerDB is waiting for the network lock.
   317  	var doInitPeerDB bool
   318  	defer func() {
   319  		n.Unlock()
   320  		if doInitPeerDB {
   321  			n.driver.initSandboxPeerDB(n.id)
   322  		}
   323  	}()
   324  
   325  	if !n.sboxInit {
   326  		n.initErr = n.initSandbox(restore)
   327  		doInitPeerDB = n.initErr == nil && !restore
   328  		// If there was an error, we cannot recover it
   329  		n.sboxInit = true
   330  	}
   331  
   332  	if n.initErr != nil {
   333  		return fmt.Errorf("network sandbox join failed: %v", n.initErr)
   334  	}
   335  
   336  	subnetErr := s.initErr
   337  	if !s.sboxInit {
   338  		subnetErr = n.initSubnetSandbox(s, restore)
   339  		// We can recover from these errors, but not on restore
   340  		if restore || subnetErr == nil {
   341  			s.initErr = subnetErr
   342  			s.sboxInit = true
   343  		}
   344  	}
   345  	if subnetErr != nil {
   346  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr)
   347  	}
   348  
   349  	if incJoinCount {
   350  		n.joinCnt++
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  func (n *network) leaveSandbox() {
   357  	n.Lock()
   358  	defer n.Unlock()
   359  	n.joinCnt--
   360  	if n.joinCnt != 0 {
   361  		return
   362  	}
   363  
   364  	n.destroySandbox()
   365  
   366  	n.sboxInit = false
   367  	n.initErr = nil
   368  	for _, s := range n.subnets {
   369  		s.sboxInit = false
   370  		s.initErr = nil
   371  	}
   372  }
   373  
   374  // to be called while holding network lock
   375  func (n *network) destroySandbox() {
   376  	if n.sbox != nil {
   377  		for _, iface := range n.sbox.Info().Interfaces() {
   378  			if err := iface.Remove(); err != nil {
   379  				logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
   380  			}
   381  		}
   382  
   383  		for _, s := range n.subnets {
   384  			if hostMode {
   385  				if err := removeFilters(n.id[:12], s.brName); err != nil {
   386  					logrus.Warnf("Could not remove overlay filters: %v", err)
   387  				}
   388  			}
   389  
   390  			if s.vxlanName != "" {
   391  				err := deleteInterface(s.vxlanName)
   392  				if err != nil {
   393  					logrus.Warnf("could not cleanup sandbox properly: %v", err)
   394  				}
   395  			}
   396  		}
   397  
   398  		if hostMode {
   399  			if err := removeNetworkChain(n.id[:12]); err != nil {
   400  				logrus.Warnf("could not remove network chain: %v", err)
   401  			}
   402  		}
   403  
   404  		// Close the netlink socket, this will also release the watchMiss goroutine that is using it
   405  		if n.nlSocket != nil {
   406  			n.nlSocket.Close()
   407  			n.nlSocket = nil
   408  		}
   409  
   410  		n.sbox.Destroy()
   411  		n.sbox = nil
   412  	}
   413  }
   414  
   415  func populateVNITbl() {
   416  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   417  		func(path string, info os.FileInfo, err error) error {
   418  			_, fname := filepath.Split(path)
   419  
   420  			if len(strings.Split(fname, "-")) <= 1 {
   421  				return nil
   422  			}
   423  
   424  			ns, err := netns.GetFromPath(path)
   425  			if err != nil {
   426  				logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err)
   427  				return nil
   428  			}
   429  			defer ns.Close()
   430  
   431  			nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE)
   432  			if err != nil {
   433  				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
   434  				return nil
   435  			}
   436  			defer nlh.Delete()
   437  
   438  			err = nlh.SetSocketTimeout(soTimeout)
   439  			if err != nil {
   440  				logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err)
   441  			}
   442  
   443  			links, err := nlh.LinkList()
   444  			if err != nil {
   445  				logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err)
   446  				return nil
   447  			}
   448  
   449  			for _, l := range links {
   450  				if l.Type() == "vxlan" {
   451  					vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path
   452  				}
   453  			}
   454  
   455  			return nil
   456  		})
   457  }
   458  
   459  func networkOnceInit() {
   460  	populateVNITbl()
   461  
   462  	if os.Getenv("_OVERLAY_HOST_MODE") != "" {
   463  		hostMode = true
   464  		return
   465  	}
   466  
   467  	err := createVxlan("testvxlan", 1, 0)
   468  	if err != nil {
   469  		logrus.Errorf("Failed to create testvxlan interface: %v", err)
   470  		return
   471  	}
   472  
   473  	defer deleteInterface("testvxlan")
   474  
   475  	path := "/proc/self/ns/net"
   476  	hNs, err := netns.GetFromPath(path)
   477  	if err != nil {
   478  		logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
   479  		return
   480  	}
   481  	defer hNs.Close()
   482  
   483  	nlh := ns.NlHandle()
   484  
   485  	iface, err := nlh.LinkByName("testvxlan")
   486  	if err != nil {
   487  		logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
   488  		return
   489  	}
   490  
   491  	// If we are not able to move the vxlan interface to a namespace
   492  	// then fallback to host mode
   493  	if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
   494  		hostMode = true
   495  	}
   496  }
   497  
   498  func (n *network) generateVxlanName(s *subnet) string {
   499  	id := n.id
   500  	if len(n.id) > 5 {
   501  		id = n.id[:5]
   502  	}
   503  
   504  	return fmt.Sprintf("vx-%06x-%v", s.vni, id)
   505  }
   506  
   507  func (n *network) generateBridgeName(s *subnet) string {
   508  	id := n.id
   509  	if len(n.id) > 5 {
   510  		id = n.id[:5]
   511  	}
   512  
   513  	return n.getBridgeNamePrefix(s) + "-" + id
   514  }
   515  
   516  func (n *network) getBridgeNamePrefix(s *subnet) string {
   517  	return fmt.Sprintf("ov-%06x", s.vni)
   518  }
   519  
   520  func checkOverlap(nw *net.IPNet) error {
   521  	var nameservers []string
   522  
   523  	if rc, err := resolvconf.Get(); err == nil {
   524  		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
   525  	}
   526  
   527  	if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
   528  		return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
   529  	}
   530  
   531  	if err := netutils.CheckRouteOverlaps(nw); err != nil {
   532  		return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
   533  	}
   534  
   535  	return nil
   536  }
   537  
   538  func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
   539  	sbox := n.sbox
   540  
   541  	// restore overlay osl sandbox
   542  	Ifaces := make(map[string][]osl.IfaceOption)
   543  	brIfaceOption := make([]osl.IfaceOption, 2)
   544  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
   545  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
   546  	Ifaces[brName+"+br"] = brIfaceOption
   547  
   548  	err := sbox.Restore(Ifaces, nil, nil, nil)
   549  	if err != nil {
   550  		return err
   551  	}
   552  
   553  	Ifaces = make(map[string][]osl.IfaceOption)
   554  	vxlanIfaceOption := make([]osl.IfaceOption, 1)
   555  	vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
   556  	Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
   557  	return sbox.Restore(Ifaces, nil, nil, nil)
   558  }
   559  
   560  func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
   561  
   562  	if hostMode {
   563  		// Try to delete stale bridge interface if it exists
   564  		if err := deleteInterface(brName); err != nil {
   565  			deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
   566  		}
   567  		// Try to delete the vxlan interface by vni if already present
   568  		deleteVxlanByVNI("", s.vni)
   569  
   570  		if err := checkOverlap(s.subnetIP); err != nil {
   571  			return err
   572  		}
   573  	}
   574  
   575  	if !hostMode {
   576  		// Try to find this subnet's vni is being used in some
   577  		// other namespace by looking at vniTbl that we just
   578  		// populated in the once init. If a hit is found then
   579  		// it must a stale namespace from previous
   580  		// life. Destroy it completely and reclaim resourced.
   581  		networkMu.Lock()
   582  		path, ok := vniTbl[s.vni]
   583  		networkMu.Unlock()
   584  
   585  		if ok {
   586  			deleteVxlanByVNI(path, s.vni)
   587  			if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
   588  				logrus.Errorf("unmount of %s failed: %v", path, err)
   589  			}
   590  			os.Remove(path)
   591  
   592  			networkMu.Lock()
   593  			delete(vniTbl, s.vni)
   594  			networkMu.Unlock()
   595  		}
   596  	}
   597  
   598  	// create a bridge and vxlan device for this subnet and move it to the sandbox
   599  	sbox := n.sbox
   600  
   601  	if err := sbox.AddInterface(brName, "br",
   602  		sbox.InterfaceOptions().Address(s.gwIP),
   603  		sbox.InterfaceOptions().Bridge(true)); err != nil {
   604  		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
   605  	}
   606  
   607  	err := createVxlan(vxlanName, s.vni, n.maxMTU())
   608  	if err != nil {
   609  		return err
   610  	}
   611  
   612  	if err := sbox.AddInterface(vxlanName, "vxlan",
   613  		sbox.InterfaceOptions().Master(brName)); err != nil {
   614  		// If adding vxlan device to the overlay namespace fails, remove the bridge interface we
   615  		// already added to the namespace. This allows the caller to try the setup again.
   616  		for _, iface := range sbox.Info().Interfaces() {
   617  			if iface.SrcName() == brName {
   618  				if ierr := iface.Remove(); ierr != nil {
   619  					logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr)
   620  				}
   621  			}
   622  		}
   623  
   624  		// Also, delete the vxlan interface. Since a global vni id is associated
   625  		// with the vxlan interface, an orphaned vxlan interface will result in
   626  		// failure of vxlan device creation if the vni is assigned to some other
   627  		// network.
   628  		if deleteErr := deleteInterface(vxlanName); deleteErr != nil {
   629  			logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err)
   630  		}
   631  		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
   632  	}
   633  
   634  	if !hostMode {
   635  		var name string
   636  		for _, i := range sbox.Info().Interfaces() {
   637  			if i.Bridge() {
   638  				name = i.DstName()
   639  			}
   640  		}
   641  		cmd := &exec.Cmd{
   642  			Path:   reexec.Self(),
   643  			Args:   []string{"set-default-vlan", sbox.Key(), name},
   644  			Stdout: os.Stdout,
   645  			Stderr: os.Stderr,
   646  		}
   647  		if err := cmd.Run(); err != nil {
   648  			// not a fatal error
   649  			logrus.Errorf("reexec to set bridge default vlan failed %v", err)
   650  		}
   651  	}
   652  
   653  	if hostMode {
   654  		if err := addFilters(n.id[:12], brName); err != nil {
   655  			return err
   656  		}
   657  	}
   658  
   659  	return nil
   660  }
   661  
   662  // Must be called with the network lock
   663  func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
   664  	brName := n.generateBridgeName(s)
   665  	vxlanName := n.generateVxlanName(s)
   666  
   667  	// Program iptables rules for mandatory encryption of the secure
   668  	// network, or clean up leftover rules for a stale secure network which
   669  	// was previously assigned the same VNI.
   670  	if err := programMangle(s.vni, n.secure); err != nil {
   671  		return err
   672  	}
   673  	if err := programInput(s.vni, n.secure); err != nil {
   674  		if n.secure {
   675  			return multierror.Append(err, programMangle(s.vni, false))
   676  		}
   677  	}
   678  
   679  	if restore {
   680  		if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
   681  			return err
   682  		}
   683  	} else {
   684  		if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
   685  			return err
   686  		}
   687  	}
   688  
   689  	s.vxlanName = vxlanName
   690  	s.brName = brName
   691  
   692  	return nil
   693  }
   694  
   695  func (n *network) cleanupStaleSandboxes() {
   696  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   697  		func(path string, info os.FileInfo, err error) error {
   698  			_, fname := filepath.Split(path)
   699  
   700  			pList := strings.Split(fname, "-")
   701  			if len(pList) <= 1 {
   702  				return nil
   703  			}
   704  
   705  			pattern := pList[1]
   706  			if strings.Contains(n.id, pattern) {
   707  				// Delete all vnis
   708  				deleteVxlanByVNI(path, 0)
   709  				unix.Unmount(path, unix.MNT_DETACH)
   710  				os.Remove(path)
   711  
   712  				// Now that we have destroyed this
   713  				// sandbox, remove all references to
   714  				// it in vniTbl so that we don't
   715  				// inadvertently destroy the sandbox
   716  				// created in this life.
   717  				networkMu.Lock()
   718  				for vni, tblPath := range vniTbl {
   719  					if tblPath == path {
   720  						delete(vniTbl, vni)
   721  					}
   722  				}
   723  				networkMu.Unlock()
   724  			}
   725  
   726  			return nil
   727  		})
   728  }
   729  
   730  func (n *network) initSandbox(restore bool) error {
   731  	n.initEpoch++
   732  
   733  	if !restore {
   734  		if hostMode {
   735  			if err := addNetworkChain(n.id[:12]); err != nil {
   736  				return err
   737  			}
   738  		}
   739  
   740  		// If there are any stale sandboxes related to this network
   741  		// from previous daemon life clean it up here
   742  		n.cleanupStaleSandboxes()
   743  	}
   744  
   745  	// In the restore case network sandbox already exist; but we don't know
   746  	// what epoch number it was created with. It has to be retrieved by
   747  	// searching the net namespaces.
   748  	var key string
   749  	if restore {
   750  		key = osl.GenerateKey("-" + n.id)
   751  	} else {
   752  		key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
   753  	}
   754  
   755  	sbox, err := osl.NewSandbox(key, !hostMode, restore)
   756  	if err != nil {
   757  		return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
   758  	}
   759  
   760  	// this is needed to let the peerAdd configure the sandbox
   761  	n.sbox = sbox
   762  
   763  	// If we are in swarm mode, we don't need anymore the watchMiss routine.
   764  	// This will save 1 thread and 1 netlink socket per network
   765  	if !n.driver.isSerfAlive() {
   766  		return nil
   767  	}
   768  
   769  	var nlSock *nl.NetlinkSocket
   770  	sbox.InvokeFunc(func() {
   771  		nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
   772  		if err != nil {
   773  			return
   774  		}
   775  		// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
   776  		tv := unix.NsecToTimeval(soTimeout.Nanoseconds())
   777  		err = nlSock.SetReceiveTimeout(&tv)
   778  	})
   779  	n.nlSocket = nlSock
   780  
   781  	if err == nil {
   782  		go n.watchMiss(nlSock, key)
   783  	} else {
   784  		logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v",
   785  			n.id, sbox.Key(), err)
   786  	}
   787  
   788  	return nil
   789  }
   790  
   791  func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
   792  	// With the new version of the netlink library the deserialize function makes
   793  	// requests about the interface of the netlink message. This can succeed only
   794  	// if this go routine is in the target namespace. For this reason following we
   795  	// lock the thread on that namespace
   796  	runtime.LockOSThread()
   797  	defer runtime.UnlockOSThread()
   798  	newNs, err := netns.GetFromPath(nsPath)
   799  	if err != nil {
   800  		logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath)
   801  		return
   802  	}
   803  	defer newNs.Close()
   804  	if err = netns.Set(newNs); err != nil {
   805  		logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath)
   806  		return
   807  	}
   808  	for {
   809  		msgs, _, err := nlSock.Receive()
   810  		if err != nil {
   811  			n.Lock()
   812  			nlFd := nlSock.GetFd()
   813  			n.Unlock()
   814  			if nlFd == -1 {
   815  				// The netlink socket got closed, simply exit to not leak this goroutine
   816  				return
   817  			}
   818  			// When the receive timeout expires the receive will return EAGAIN
   819  			if err == unix.EAGAIN {
   820  				// we continue here to avoid spam for timeouts
   821  				continue
   822  			}
   823  			logrus.Errorf("Failed to receive from netlink: %v ", err)
   824  			continue
   825  		}
   826  
   827  		for _, msg := range msgs {
   828  			if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH {
   829  				continue
   830  			}
   831  
   832  			neigh, err := netlink.NeighDeserialize(msg.Data)
   833  			if err != nil {
   834  				logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
   835  				continue
   836  			}
   837  
   838  			var (
   839  				ip             net.IP
   840  				mac            net.HardwareAddr
   841  				l2Miss, l3Miss bool
   842  			)
   843  			if neigh.IP.To4() != nil {
   844  				ip = neigh.IP
   845  				l3Miss = true
   846  			} else if neigh.HardwareAddr != nil {
   847  				mac = []byte(neigh.HardwareAddr)
   848  				ip = net.IP(mac[2:])
   849  				l2Miss = true
   850  			} else {
   851  				continue
   852  			}
   853  
   854  			// Not any of the network's subnets. Ignore.
   855  			if !n.contains(ip) {
   856  				continue
   857  			}
   858  
   859  			if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
   860  				continue
   861  			}
   862  
   863  			logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
   864  			mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
   865  			if err != nil {
   866  				logrus.Errorf("could not resolve peer %q: %v", ip, err)
   867  				continue
   868  			}
   869  			n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
   870  		}
   871  	}
   872  }
   873  
   874  // Restore a network from the store to the driver if it is present.
   875  // Must be called with the driver locked!
   876  func (d *driver) restoreNetworkFromStore(nid string) *network {
   877  	n := d.getNetworkFromStore(nid)
   878  	if n != nil {
   879  		n.driver = d
   880  		n.endpoints = endpointTable{}
   881  		d.networks[nid] = n
   882  	}
   883  	return n
   884  }
   885  
   886  func (d *driver) network(nid string) *network {
   887  	d.Lock()
   888  	n, ok := d.networks[nid]
   889  	if !ok {
   890  		n = d.restoreNetworkFromStore(nid)
   891  	}
   892  	d.Unlock()
   893  
   894  	return n
   895  }
   896  
   897  func (d *driver) getNetworkFromStore(nid string) *network {
   898  	if d.store == nil {
   899  		return nil
   900  	}
   901  
   902  	n := &network{id: nid}
   903  	if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
   904  		return nil
   905  	}
   906  
   907  	return n
   908  }
   909  
   910  func (n *network) sandbox() osl.Sandbox {
   911  	n.Lock()
   912  	defer n.Unlock()
   913  	return n.sbox
   914  }
   915  
   916  func (n *network) vxlanID(s *subnet) uint32 {
   917  	n.Lock()
   918  	defer n.Unlock()
   919  	return s.vni
   920  }
   921  
   922  func (n *network) setVxlanID(s *subnet, vni uint32) {
   923  	n.Lock()
   924  	s.vni = vni
   925  	n.Unlock()
   926  }
   927  
   928  func (n *network) Key() []string {
   929  	return []string{"overlay", "network", n.id}
   930  }
   931  
   932  func (n *network) KeyPrefix() []string {
   933  	return []string{"overlay", "network"}
   934  }
   935  
   936  func (n *network) Value() []byte {
   937  	m := map[string]interface{}{}
   938  
   939  	netJSON := []*subnetJSON{}
   940  
   941  	for _, s := range n.subnets {
   942  		sj := &subnetJSON{
   943  			SubnetIP: s.subnetIP.String(),
   944  			GwIP:     s.gwIP.String(),
   945  			Vni:      s.vni,
   946  		}
   947  		netJSON = append(netJSON, sj)
   948  	}
   949  
   950  	m["secure"] = n.secure
   951  	m["subnets"] = netJSON
   952  	m["mtu"] = n.mtu
   953  	b, err := json.Marshal(m)
   954  	if err != nil {
   955  		return []byte{}
   956  	}
   957  
   958  	return b
   959  }
   960  
   961  func (n *network) Index() uint64 {
   962  	return n.dbIndex
   963  }
   964  
   965  func (n *network) SetIndex(index uint64) {
   966  	n.dbIndex = index
   967  	n.dbExists = true
   968  }
   969  
   970  func (n *network) Exists() bool {
   971  	return n.dbExists
   972  }
   973  
   974  func (n *network) Skip() bool {
   975  	return false
   976  }
   977  
   978  func (n *network) SetValue(value []byte) error {
   979  	var (
   980  		m       map[string]interface{}
   981  		newNet  bool
   982  		isMap   = true
   983  		netJSON = []*subnetJSON{}
   984  	)
   985  
   986  	if err := json.Unmarshal(value, &m); err != nil {
   987  		err := json.Unmarshal(value, &netJSON)
   988  		if err != nil {
   989  			return err
   990  		}
   991  		isMap = false
   992  	}
   993  
   994  	if len(n.subnets) == 0 {
   995  		newNet = true
   996  	}
   997  
   998  	if isMap {
   999  		if val, ok := m["secure"]; ok {
  1000  			n.secure = val.(bool)
  1001  		}
  1002  		if val, ok := m["mtu"]; ok {
  1003  			n.mtu = int(val.(float64))
  1004  		}
  1005  		bytes, err := json.Marshal(m["subnets"])
  1006  		if err != nil {
  1007  			return err
  1008  		}
  1009  		if err := json.Unmarshal(bytes, &netJSON); err != nil {
  1010  			return err
  1011  		}
  1012  	}
  1013  
  1014  	for _, sj := range netJSON {
  1015  		subnetIPstr := sj.SubnetIP
  1016  		gwIPstr := sj.GwIP
  1017  		vni := sj.Vni
  1018  
  1019  		subnetIP, _ := types.ParseCIDR(subnetIPstr)
  1020  		gwIP, _ := types.ParseCIDR(gwIPstr)
  1021  
  1022  		if newNet {
  1023  			s := &subnet{
  1024  				subnetIP: subnetIP,
  1025  				gwIP:     gwIP,
  1026  				vni:      vni,
  1027  			}
  1028  			n.subnets = append(n.subnets, s)
  1029  		} else {
  1030  			sNet := n.getMatchingSubnet(subnetIP)
  1031  			if sNet != nil {
  1032  				sNet.vni = vni
  1033  			}
  1034  		}
  1035  	}
  1036  	return nil
  1037  }
  1038  
  1039  func (n *network) DataScope() string {
  1040  	return datastore.GlobalScope
  1041  }
  1042  
  1043  func (n *network) writeToStore() error {
  1044  	if n.driver.store == nil {
  1045  		return nil
  1046  	}
  1047  
  1048  	return n.driver.store.PutObjectAtomic(n)
  1049  }
  1050  
  1051  func (n *network) releaseVxlanID() ([]uint32, error) {
  1052  	n.Lock()
  1053  	nSubnets := len(n.subnets)
  1054  	n.Unlock()
  1055  	if nSubnets == 0 {
  1056  		return nil, nil
  1057  	}
  1058  
  1059  	if n.driver.store != nil {
  1060  		if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  1061  			if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  1062  				// In both the above cases we can safely assume that the key has been removed by some other
  1063  				// instance and so simply get out of here
  1064  				return nil, nil
  1065  			}
  1066  
  1067  			return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  1068  		}
  1069  	}
  1070  	var vnis []uint32
  1071  	n.Lock()
  1072  	for _, s := range n.subnets {
  1073  		if n.driver.vxlanIdm != nil {
  1074  			vnis = append(vnis, s.vni)
  1075  		}
  1076  		s.vni = 0
  1077  	}
  1078  	n.Unlock()
  1079  
  1080  	for _, vni := range vnis {
  1081  		n.driver.vxlanIdm.Release(uint64(vni))
  1082  	}
  1083  
  1084  	return vnis, nil
  1085  }
  1086  
  1087  func (n *network) obtainVxlanID(s *subnet) error {
  1088  	//return if the subnet already has a vxlan id assigned
  1089  	if n.vxlanID(s) != 0 {
  1090  		return nil
  1091  	}
  1092  
  1093  	if n.driver.store == nil {
  1094  		return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
  1095  	}
  1096  
  1097  	for {
  1098  		if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  1099  			return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  1100  		}
  1101  
  1102  		if n.vxlanID(s) == 0 {
  1103  			vxlanID, err := n.driver.vxlanIdm.GetID(true)
  1104  			if err != nil {
  1105  				return fmt.Errorf("failed to allocate vxlan id: %v", err)
  1106  			}
  1107  
  1108  			n.setVxlanID(s, uint32(vxlanID))
  1109  			if err := n.writeToStore(); err != nil {
  1110  				n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  1111  				n.setVxlanID(s, 0)
  1112  				if err == datastore.ErrKeyModified {
  1113  					continue
  1114  				}
  1115  				return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  1116  			}
  1117  			return nil
  1118  		}
  1119  		return nil
  1120  	}
  1121  }
  1122  
  1123  // contains return true if the passed ip belongs to one the network's
  1124  // subnets
  1125  func (n *network) contains(ip net.IP) bool {
  1126  	for _, s := range n.subnets {
  1127  		if s.subnetIP.Contains(ip) {
  1128  			return true
  1129  		}
  1130  	}
  1131  
  1132  	return false
  1133  }
  1134  
  1135  // getSubnetforIP returns the subnet to which the given IP belongs
  1136  func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  1137  	for _, s := range n.subnets {
  1138  		// first check if the mask lengths are the same
  1139  		i, _ := s.subnetIP.Mask.Size()
  1140  		j, _ := ip.Mask.Size()
  1141  		if i != j {
  1142  			continue
  1143  		}
  1144  		if s.subnetIP.Contains(ip.IP) {
  1145  			return s
  1146  		}
  1147  	}
  1148  	return nil
  1149  }
  1150  
  1151  // getMatchingSubnet return the network's subnet that matches the input
  1152  func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  1153  	if ip == nil {
  1154  		return nil
  1155  	}
  1156  	for _, s := range n.subnets {
  1157  		// first check if the mask lengths are the same
  1158  		i, _ := s.subnetIP.Mask.Size()
  1159  		j, _ := ip.Mask.Size()
  1160  		if i != j {
  1161  			continue
  1162  		}
  1163  		if s.subnetIP.IP.Equal(ip.IP) {
  1164  			return s
  1165  		}
  1166  	}
  1167  	return nil
  1168  }