github.com/pwn-term/docker@v0.0.0-20210616085119-6e977cce2565/libnetwork/drivers/overlay/ov_network.go (about)

     1  package overlay
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/docker/docker/pkg/reexec"
    17  	"github.com/docker/libnetwork/datastore"
    18  	"github.com/docker/libnetwork/driverapi"
    19  	"github.com/docker/libnetwork/netlabel"
    20  	"github.com/docker/libnetwork/netutils"
    21  	"github.com/docker/libnetwork/ns"
    22  	"github.com/docker/libnetwork/osl"
    23  	"github.com/docker/libnetwork/resolvconf"
    24  	"github.com/docker/libnetwork/types"
    25  	"github.com/sirupsen/logrus"
    26  	"github.com/vishvananda/netlink"
    27  	"github.com/vishvananda/netlink/nl"
    28  	"github.com/vishvananda/netns"
    29  	"golang.org/x/sys/unix"
    30  )
    31  
    32  var (
    33  	hostMode    bool
    34  	networkOnce sync.Once
    35  	networkMu   sync.Mutex
    36  	vniTbl      = make(map[uint32]string)
    37  )
    38  
    39  type networkTable map[string]*network
    40  
    41  type subnet struct {
    42  	sboxInit  bool
    43  	vxlanName string
    44  	brName    string
    45  	vni       uint32
    46  	initErr   error
    47  	subnetIP  *net.IPNet
    48  	gwIP      *net.IPNet
    49  }
    50  
    51  type subnetJSON struct {
    52  	SubnetIP string
    53  	GwIP     string
    54  	Vni      uint32
    55  }
    56  
    57  type network struct {
    58  	id        string
    59  	dbIndex   uint64
    60  	dbExists  bool
    61  	sbox      osl.Sandbox
    62  	nlSocket  *nl.NetlinkSocket
    63  	endpoints endpointTable
    64  	driver    *driver
    65  	joinCnt   int
    66  	sboxInit  bool
    67  	initEpoch int
    68  	initErr   error
    69  	subnets   []*subnet
    70  	secure    bool
    71  	mtu       int
    72  	sync.Mutex
    73  }
    74  
    75  func init() {
    76  	reexec.Register("set-default-vlan", setDefaultVlan)
    77  }
    78  
    79  func setDefaultVlan() {
    80  	if len(os.Args) < 3 {
    81  		logrus.Error("insufficient number of arguments")
    82  		os.Exit(1)
    83  	}
    84  
    85  	runtime.LockOSThread()
    86  	defer runtime.UnlockOSThread()
    87  
    88  	nsPath := os.Args[1]
    89  	ns, err := netns.GetFromPath(nsPath)
    90  	if err != nil {
    91  		logrus.Errorf("overlay namespace get failed, %v", err)
    92  		os.Exit(1)
    93  	}
    94  	if err = netns.Set(ns); err != nil {
    95  		logrus.Errorf("setting into overlay namespace failed, %v", err)
    96  		os.Exit(1)
    97  	}
    98  
    99  	// make sure the sysfs mount doesn't propagate back
   100  	if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
   101  		logrus.Errorf("unshare failed, %v", err)
   102  		os.Exit(1)
   103  	}
   104  
   105  	flag := unix.MS_PRIVATE | unix.MS_REC
   106  	if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
   107  		logrus.Errorf("root mount failed, %v", err)
   108  		os.Exit(1)
   109  	}
   110  
   111  	if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
   112  		logrus.Errorf("mounting sysfs failed, %v", err)
   113  		os.Exit(1)
   114  	}
   115  
   116  	brName := os.Args[2]
   117  	path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
   118  	data := []byte{'0', '\n'}
   119  
   120  	if err = ioutil.WriteFile(path, data, 0644); err != nil {
   121  		logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err)
   122  		os.Exit(1)
   123  	}
   124  	os.Exit(0)
   125  }
   126  
   127  func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
   128  	return nil, types.NotImplementedErrorf("not implemented")
   129  }
   130  
   131  func (d *driver) NetworkFree(id string) error {
   132  	return types.NotImplementedErrorf("not implemented")
   133  }
   134  
   135  func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
   136  	if id == "" {
   137  		return fmt.Errorf("invalid network id")
   138  	}
   139  	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
   140  		return types.BadRequestErrorf("ipv4 pool is empty")
   141  	}
   142  
   143  	// Since we perform lazy configuration make sure we try
   144  	// configuring the driver when we enter CreateNetwork
   145  	if err := d.configure(); err != nil {
   146  		return err
   147  	}
   148  
   149  	n := &network{
   150  		id:        id,
   151  		driver:    d,
   152  		endpoints: endpointTable{},
   153  		subnets:   []*subnet{},
   154  	}
   155  
   156  	vnis := make([]uint32, 0, len(ipV4Data))
   157  	if gval, ok := option[netlabel.GenericData]; ok {
   158  		optMap := gval.(map[string]string)
   159  		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
   160  			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
   161  			vniStrings := strings.Split(val, ",")
   162  			for _, vniStr := range vniStrings {
   163  				vni, err := strconv.Atoi(vniStr)
   164  				if err != nil {
   165  					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
   166  				}
   167  
   168  				vnis = append(vnis, uint32(vni))
   169  			}
   170  		}
   171  		if _, ok := optMap[secureOption]; ok {
   172  			n.secure = true
   173  		}
   174  		if val, ok := optMap[netlabel.DriverMTU]; ok {
   175  			var err error
   176  			if n.mtu, err = strconv.Atoi(val); err != nil {
   177  				return fmt.Errorf("failed to parse %v: %v", val, err)
   178  			}
   179  			if n.mtu < 0 {
   180  				return fmt.Errorf("invalid MTU value: %v", n.mtu)
   181  			}
   182  		}
   183  	}
   184  
   185  	// If we are getting vnis from libnetwork, either we get for
   186  	// all subnets or none.
   187  	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
   188  		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
   189  	}
   190  
   191  	for i, ipd := range ipV4Data {
   192  		s := &subnet{
   193  			subnetIP: ipd.Pool,
   194  			gwIP:     ipd.Gateway,
   195  		}
   196  
   197  		if len(vnis) != 0 {
   198  			s.vni = vnis[i]
   199  		}
   200  
   201  		n.subnets = append(n.subnets, s)
   202  	}
   203  
   204  	d.Lock()
   205  	defer d.Unlock()
   206  	if d.networks[n.id] != nil {
   207  		return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
   208  	}
   209  
   210  	if err := n.writeToStore(); err != nil {
   211  		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
   212  	}
   213  
   214  	// Make sure no rule is on the way from any stale secure network
   215  	if !n.secure {
   216  		for _, vni := range vnis {
   217  			programMangle(vni, false)
   218  			programInput(vni, false)
   219  		}
   220  	}
   221  
   222  	if nInfo != nil {
   223  		if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
   224  			// XXX Undo writeToStore?  No method to so.  Why?
   225  			return err
   226  		}
   227  	}
   228  
   229  	d.networks[id] = n
   230  
   231  	return nil
   232  }
   233  
   234  func (d *driver) DeleteNetwork(nid string) error {
   235  	if nid == "" {
   236  		return fmt.Errorf("invalid network id")
   237  	}
   238  
   239  	// Make sure driver resources are initialized before proceeding
   240  	if err := d.configure(); err != nil {
   241  		return err
   242  	}
   243  
   244  	d.Lock()
   245  	// Only perform a peer flush operation (if required) AFTER unlocking
   246  	// the driver lock to avoid deadlocking w/ the peerDB.
   247  	var doPeerFlush bool
   248  	defer func() {
   249  		d.Unlock()
   250  		if doPeerFlush {
   251  			d.peerFlush(nid)
   252  		}
   253  	}()
   254  
   255  	// This is similar to d.network(), but we need to keep holding the lock
   256  	// until we are done removing this network.
   257  	n, ok := d.networks[nid]
   258  	if !ok {
   259  		n = d.restoreNetworkFromStore(nid)
   260  	}
   261  	if n == nil {
   262  		return fmt.Errorf("could not find network with id %s", nid)
   263  	}
   264  
   265  	for _, ep := range n.endpoints {
   266  		if ep.ifName != "" {
   267  			if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil {
   268  				if err := ns.NlHandle().LinkDel(link); err != nil {
   269  					logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id)
   270  				}
   271  			}
   272  		}
   273  
   274  		if err := d.deleteEndpointFromStore(ep); err != nil {
   275  			logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
   276  		}
   277  	}
   278  
   279  	doPeerFlush = true
   280  	delete(d.networks, nid)
   281  
   282  	vnis, err := n.releaseVxlanID()
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	if n.secure {
   288  		for _, vni := range vnis {
   289  			programMangle(vni, false)
   290  			programInput(vni, false)
   291  		}
   292  	}
   293  
   294  	return nil
   295  }
   296  
   297  func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
   298  	return nil
   299  }
   300  
   301  func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
   302  	return nil
   303  }
   304  
   305  func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error {
   306  	// If there is a race between two go routines here only one will win
   307  	// the other will wait.
   308  	networkOnce.Do(networkOnceInit)
   309  
   310  	n.Lock()
   311  	// If non-restore initialization occurred and was successful then
   312  	// tell the peerDB to initialize the sandbox with all the peers
   313  	// previously received from networkdb.  But only do this after
   314  	// unlocking the network.  Otherwise we could deadlock with
   315  	// on the peerDB channel while peerDB is waiting for the network lock.
   316  	var doInitPeerDB bool
   317  	defer func() {
   318  		n.Unlock()
   319  		if doInitPeerDB {
   320  			n.driver.initSandboxPeerDB(n.id)
   321  		}
   322  	}()
   323  
   324  	if !n.sboxInit {
   325  		n.initErr = n.initSandbox(restore)
   326  		doInitPeerDB = n.initErr == nil && !restore
   327  		// If there was an error, we cannot recover it
   328  		n.sboxInit = true
   329  	}
   330  
   331  	if n.initErr != nil {
   332  		return fmt.Errorf("network sandbox join failed: %v", n.initErr)
   333  	}
   334  
   335  	subnetErr := s.initErr
   336  	if !s.sboxInit {
   337  		subnetErr = n.initSubnetSandbox(s, restore)
   338  		// We can recover from these errors, but not on restore
   339  		if restore || subnetErr == nil {
   340  			s.initErr = subnetErr
   341  			s.sboxInit = true
   342  		}
   343  	}
   344  	if subnetErr != nil {
   345  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr)
   346  	}
   347  
   348  	if incJoinCount {
   349  		n.joinCnt++
   350  	}
   351  
   352  	return nil
   353  }
   354  
   355  func (n *network) leaveSandbox() {
   356  	n.Lock()
   357  	defer n.Unlock()
   358  	n.joinCnt--
   359  	if n.joinCnt != 0 {
   360  		return
   361  	}
   362  
   363  	n.destroySandbox()
   364  
   365  	n.sboxInit = false
   366  	n.initErr = nil
   367  	for _, s := range n.subnets {
   368  		s.sboxInit = false
   369  		s.initErr = nil
   370  	}
   371  }
   372  
   373  // to be called while holding network lock
   374  func (n *network) destroySandbox() {
   375  	if n.sbox != nil {
   376  		for _, iface := range n.sbox.Info().Interfaces() {
   377  			if err := iface.Remove(); err != nil {
   378  				logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
   379  			}
   380  		}
   381  
   382  		for _, s := range n.subnets {
   383  			if hostMode {
   384  				if err := removeFilters(n.id[:12], s.brName); err != nil {
   385  					logrus.Warnf("Could not remove overlay filters: %v", err)
   386  				}
   387  			}
   388  
   389  			if s.vxlanName != "" {
   390  				err := deleteInterface(s.vxlanName)
   391  				if err != nil {
   392  					logrus.Warnf("could not cleanup sandbox properly: %v", err)
   393  				}
   394  			}
   395  		}
   396  
   397  		if hostMode {
   398  			if err := removeNetworkChain(n.id[:12]); err != nil {
   399  				logrus.Warnf("could not remove network chain: %v", err)
   400  			}
   401  		}
   402  
   403  		// Close the netlink socket, this will also release the watchMiss goroutine that is using it
   404  		if n.nlSocket != nil {
   405  			n.nlSocket.Close()
   406  			n.nlSocket = nil
   407  		}
   408  
   409  		n.sbox.Destroy()
   410  		n.sbox = nil
   411  	}
   412  }
   413  
   414  func populateVNITbl() {
   415  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   416  		func(path string, info os.FileInfo, err error) error {
   417  			_, fname := filepath.Split(path)
   418  
   419  			if len(strings.Split(fname, "-")) <= 1 {
   420  				return nil
   421  			}
   422  
   423  			ns, err := netns.GetFromPath(path)
   424  			if err != nil {
   425  				logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err)
   426  				return nil
   427  			}
   428  			defer ns.Close()
   429  
   430  			nlh, err := netlink.NewHandleAt(ns, unix.NETLINK_ROUTE)
   431  			if err != nil {
   432  				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
   433  				return nil
   434  			}
   435  			defer nlh.Delete()
   436  
   437  			err = nlh.SetSocketTimeout(soTimeout)
   438  			if err != nil {
   439  				logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err)
   440  			}
   441  
   442  			links, err := nlh.LinkList()
   443  			if err != nil {
   444  				logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err)
   445  				return nil
   446  			}
   447  
   448  			for _, l := range links {
   449  				if l.Type() == "vxlan" {
   450  					vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path
   451  				}
   452  			}
   453  
   454  			return nil
   455  		})
   456  }
   457  
   458  func networkOnceInit() {
   459  	populateVNITbl()
   460  
   461  	if os.Getenv("_OVERLAY_HOST_MODE") != "" {
   462  		hostMode = true
   463  		return
   464  	}
   465  
   466  	err := createVxlan("testvxlan", 1, 0)
   467  	if err != nil {
   468  		logrus.Errorf("Failed to create testvxlan interface: %v", err)
   469  		return
   470  	}
   471  
   472  	defer deleteInterface("testvxlan")
   473  
   474  	path := "/proc/self/ns/net"
   475  	hNs, err := netns.GetFromPath(path)
   476  	if err != nil {
   477  		logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
   478  		return
   479  	}
   480  	defer hNs.Close()
   481  
   482  	nlh := ns.NlHandle()
   483  
   484  	iface, err := nlh.LinkByName("testvxlan")
   485  	if err != nil {
   486  		logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
   487  		return
   488  	}
   489  
   490  	// If we are not able to move the vxlan interface to a namespace
   491  	// then fallback to host mode
   492  	if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
   493  		hostMode = true
   494  	}
   495  }
   496  
   497  func (n *network) generateVxlanName(s *subnet) string {
   498  	id := n.id
   499  	if len(n.id) > 5 {
   500  		id = n.id[:5]
   501  	}
   502  
   503  	return fmt.Sprintf("vx-%06x-%v", s.vni, id)
   504  }
   505  
   506  func (n *network) generateBridgeName(s *subnet) string {
   507  	id := n.id
   508  	if len(n.id) > 5 {
   509  		id = n.id[:5]
   510  	}
   511  
   512  	return n.getBridgeNamePrefix(s) + "-" + id
   513  }
   514  
   515  func (n *network) getBridgeNamePrefix(s *subnet) string {
   516  	return fmt.Sprintf("ov-%06x", s.vni)
   517  }
   518  
   519  func checkOverlap(nw *net.IPNet) error {
   520  	var nameservers []string
   521  
   522  	if rc, err := resolvconf.Get(); err == nil {
   523  		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
   524  	}
   525  
   526  	if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
   527  		return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
   528  	}
   529  
   530  	if err := netutils.CheckRouteOverlaps(nw); err != nil {
   531  		return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
   532  	}
   533  
   534  	return nil
   535  }
   536  
   537  func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
   538  	sbox := n.sbox
   539  
   540  	// restore overlay osl sandbox
   541  	Ifaces := make(map[string][]osl.IfaceOption)
   542  	brIfaceOption := make([]osl.IfaceOption, 2)
   543  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
   544  	brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
   545  	Ifaces[brName+"+br"] = brIfaceOption
   546  
   547  	err := sbox.Restore(Ifaces, nil, nil, nil)
   548  	if err != nil {
   549  		return err
   550  	}
   551  
   552  	Ifaces = make(map[string][]osl.IfaceOption)
   553  	vxlanIfaceOption := make([]osl.IfaceOption, 1)
   554  	vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
   555  	Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
   556  	return sbox.Restore(Ifaces, nil, nil, nil)
   557  }
   558  
   559  func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
   560  
   561  	if hostMode {
   562  		// Try to delete stale bridge interface if it exists
   563  		if err := deleteInterface(brName); err != nil {
   564  			deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
   565  		}
   566  		// Try to delete the vxlan interface by vni if already present
   567  		deleteVxlanByVNI("", s.vni)
   568  
   569  		if err := checkOverlap(s.subnetIP); err != nil {
   570  			return err
   571  		}
   572  	}
   573  
   574  	if !hostMode {
   575  		// Try to find this subnet's vni is being used in some
   576  		// other namespace by looking at vniTbl that we just
   577  		// populated in the once init. If a hit is found then
   578  		// it must a stale namespace from previous
   579  		// life. Destroy it completely and reclaim resourced.
   580  		networkMu.Lock()
   581  		path, ok := vniTbl[s.vni]
   582  		networkMu.Unlock()
   583  
   584  		if ok {
   585  			deleteVxlanByVNI(path, s.vni)
   586  			if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
   587  				logrus.Errorf("unmount of %s failed: %v", path, err)
   588  			}
   589  			os.Remove(path)
   590  
   591  			networkMu.Lock()
   592  			delete(vniTbl, s.vni)
   593  			networkMu.Unlock()
   594  		}
   595  	}
   596  
   597  	// create a bridge and vxlan device for this subnet and move it to the sandbox
   598  	sbox := n.sbox
   599  
   600  	if err := sbox.AddInterface(brName, "br",
   601  		sbox.InterfaceOptions().Address(s.gwIP),
   602  		sbox.InterfaceOptions().Bridge(true)); err != nil {
   603  		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
   604  	}
   605  
   606  	err := createVxlan(vxlanName, s.vni, n.maxMTU())
   607  	if err != nil {
   608  		return err
   609  	}
   610  
   611  	if err := sbox.AddInterface(vxlanName, "vxlan",
   612  		sbox.InterfaceOptions().Master(brName)); err != nil {
   613  		// If adding vxlan device to the overlay namespace fails, remove the bridge interface we
   614  		// already added to the namespace. This allows the caller to try the setup again.
   615  		for _, iface := range sbox.Info().Interfaces() {
   616  			if iface.SrcName() == brName {
   617  				if ierr := iface.Remove(); ierr != nil {
   618  					logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr)
   619  				}
   620  			}
   621  		}
   622  
   623  		// Also, delete the vxlan interface. Since a global vni id is associated
   624  		// with the vxlan interface, an orphaned vxlan interface will result in
   625  		// failure of vxlan device creation if the vni is assigned to some other
   626  		// network.
   627  		if deleteErr := deleteInterface(vxlanName); deleteErr != nil {
   628  			logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err)
   629  		}
   630  		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
   631  	}
   632  
   633  	if !hostMode {
   634  		var name string
   635  		for _, i := range sbox.Info().Interfaces() {
   636  			if i.Bridge() {
   637  				name = i.DstName()
   638  			}
   639  		}
   640  		cmd := &exec.Cmd{
   641  			Path:   reexec.Self(),
   642  			Args:   []string{"set-default-vlan", sbox.Key(), name},
   643  			Stdout: os.Stdout,
   644  			Stderr: os.Stderr,
   645  		}
   646  		if err := cmd.Run(); err != nil {
   647  			// not a fatal error
   648  			logrus.Errorf("reexec to set bridge default vlan failed %v", err)
   649  		}
   650  	}
   651  
   652  	if hostMode {
   653  		if err := addFilters(n.id[:12], brName); err != nil {
   654  			return err
   655  		}
   656  	}
   657  
   658  	return nil
   659  }
   660  
   661  // Must be called with the network lock
   662  func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
   663  	brName := n.generateBridgeName(s)
   664  	vxlanName := n.generateVxlanName(s)
   665  
   666  	if restore {
   667  		if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
   668  			return err
   669  		}
   670  	} else {
   671  		if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
   672  			return err
   673  		}
   674  	}
   675  
   676  	s.vxlanName = vxlanName
   677  	s.brName = brName
   678  
   679  	return nil
   680  }
   681  
   682  func (n *network) cleanupStaleSandboxes() {
   683  	filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
   684  		func(path string, info os.FileInfo, err error) error {
   685  			_, fname := filepath.Split(path)
   686  
   687  			pList := strings.Split(fname, "-")
   688  			if len(pList) <= 1 {
   689  				return nil
   690  			}
   691  
   692  			pattern := pList[1]
   693  			if strings.Contains(n.id, pattern) {
   694  				// Delete all vnis
   695  				deleteVxlanByVNI(path, 0)
   696  				unix.Unmount(path, unix.MNT_DETACH)
   697  				os.Remove(path)
   698  
   699  				// Now that we have destroyed this
   700  				// sandbox, remove all references to
   701  				// it in vniTbl so that we don't
   702  				// inadvertently destroy the sandbox
   703  				// created in this life.
   704  				networkMu.Lock()
   705  				for vni, tblPath := range vniTbl {
   706  					if tblPath == path {
   707  						delete(vniTbl, vni)
   708  					}
   709  				}
   710  				networkMu.Unlock()
   711  			}
   712  
   713  			return nil
   714  		})
   715  }
   716  
   717  func (n *network) initSandbox(restore bool) error {
   718  	n.initEpoch++
   719  
   720  	if !restore {
   721  		if hostMode {
   722  			if err := addNetworkChain(n.id[:12]); err != nil {
   723  				return err
   724  			}
   725  		}
   726  
   727  		// If there are any stale sandboxes related to this network
   728  		// from previous daemon life clean it up here
   729  		n.cleanupStaleSandboxes()
   730  	}
   731  
   732  	// In the restore case network sandbox already exist; but we don't know
   733  	// what epoch number it was created with. It has to be retrieved by
   734  	// searching the net namespaces.
   735  	var key string
   736  	if restore {
   737  		key = osl.GenerateKey("-" + n.id)
   738  	} else {
   739  		key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
   740  	}
   741  
   742  	sbox, err := osl.NewSandbox(key, !hostMode, restore)
   743  	if err != nil {
   744  		return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
   745  	}
   746  
   747  	// this is needed to let the peerAdd configure the sandbox
   748  	n.sbox = sbox
   749  
   750  	// If we are in swarm mode, we don't need anymore the watchMiss routine.
   751  	// This will save 1 thread and 1 netlink socket per network
   752  	if !n.driver.isSerfAlive() {
   753  		return nil
   754  	}
   755  
   756  	var nlSock *nl.NetlinkSocket
   757  	sbox.InvokeFunc(func() {
   758  		nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
   759  		if err != nil {
   760  			return
   761  		}
   762  		// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
   763  		tv := unix.NsecToTimeval(soTimeout.Nanoseconds())
   764  		err = nlSock.SetReceiveTimeout(&tv)
   765  	})
   766  	n.nlSocket = nlSock
   767  
   768  	if err == nil {
   769  		go n.watchMiss(nlSock, key)
   770  	} else {
   771  		logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v",
   772  			n.id, sbox.Key(), err)
   773  	}
   774  
   775  	return nil
   776  }
   777  
   778  func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
   779  	// With the new version of the netlink library the deserialize function makes
   780  	// requests about the interface of the netlink message. This can succeed only
   781  	// if this go routine is in the target namespace. For this reason following we
   782  	// lock the thread on that namespace
   783  	runtime.LockOSThread()
   784  	defer runtime.UnlockOSThread()
   785  	newNs, err := netns.GetFromPath(nsPath)
   786  	if err != nil {
   787  		logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath)
   788  		return
   789  	}
   790  	defer newNs.Close()
   791  	if err = netns.Set(newNs); err != nil {
   792  		logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath)
   793  		return
   794  	}
   795  	for {
   796  		msgs, _, err := nlSock.Receive()
   797  		if err != nil {
   798  			n.Lock()
   799  			nlFd := nlSock.GetFd()
   800  			n.Unlock()
   801  			if nlFd == -1 {
   802  				// The netlink socket got closed, simply exit to not leak this goroutine
   803  				return
   804  			}
   805  			// When the receive timeout expires the receive will return EAGAIN
   806  			if err == unix.EAGAIN {
   807  				// we continue here to avoid spam for timeouts
   808  				continue
   809  			}
   810  			logrus.Errorf("Failed to receive from netlink: %v ", err)
   811  			continue
   812  		}
   813  
   814  		for _, msg := range msgs {
   815  			if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH {
   816  				continue
   817  			}
   818  
   819  			neigh, err := netlink.NeighDeserialize(msg.Data)
   820  			if err != nil {
   821  				logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
   822  				continue
   823  			}
   824  
   825  			var (
   826  				ip             net.IP
   827  				mac            net.HardwareAddr
   828  				l2Miss, l3Miss bool
   829  			)
   830  			if neigh.IP.To4() != nil {
   831  				ip = neigh.IP
   832  				l3Miss = true
   833  			} else if neigh.HardwareAddr != nil {
   834  				mac = []byte(neigh.HardwareAddr)
   835  				ip = net.IP(mac[2:])
   836  				l2Miss = true
   837  			} else {
   838  				continue
   839  			}
   840  
   841  			// Not any of the network's subnets. Ignore.
   842  			if !n.contains(ip) {
   843  				continue
   844  			}
   845  
   846  			if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
   847  				continue
   848  			}
   849  
   850  			logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
   851  			mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
   852  			if err != nil {
   853  				logrus.Errorf("could not resolve peer %q: %v", ip, err)
   854  				continue
   855  			}
   856  			n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
   857  		}
   858  	}
   859  }
   860  
   861  // Restore a network from the store to the driver if it is present.
   862  // Must be called with the driver locked!
   863  func (d *driver) restoreNetworkFromStore(nid string) *network {
   864  	n := d.getNetworkFromStore(nid)
   865  	if n != nil {
   866  		n.driver = d
   867  		n.endpoints = endpointTable{}
   868  		d.networks[nid] = n
   869  	}
   870  	return n
   871  }
   872  
   873  func (d *driver) network(nid string) *network {
   874  	d.Lock()
   875  	n, ok := d.networks[nid]
   876  	if !ok {
   877  		n = d.restoreNetworkFromStore(nid)
   878  	}
   879  	d.Unlock()
   880  
   881  	return n
   882  }
   883  
   884  func (d *driver) getNetworkFromStore(nid string) *network {
   885  	if d.store == nil {
   886  		return nil
   887  	}
   888  
   889  	n := &network{id: nid}
   890  	if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
   891  		return nil
   892  	}
   893  
   894  	return n
   895  }
   896  
   897  func (n *network) sandbox() osl.Sandbox {
   898  	n.Lock()
   899  	defer n.Unlock()
   900  	return n.sbox
   901  }
   902  
   903  func (n *network) vxlanID(s *subnet) uint32 {
   904  	n.Lock()
   905  	defer n.Unlock()
   906  	return s.vni
   907  }
   908  
   909  func (n *network) setVxlanID(s *subnet, vni uint32) {
   910  	n.Lock()
   911  	s.vni = vni
   912  	n.Unlock()
   913  }
   914  
   915  func (n *network) Key() []string {
   916  	return []string{"overlay", "network", n.id}
   917  }
   918  
   919  func (n *network) KeyPrefix() []string {
   920  	return []string{"overlay", "network"}
   921  }
   922  
   923  func (n *network) Value() []byte {
   924  	m := map[string]interface{}{}
   925  
   926  	netJSON := []*subnetJSON{}
   927  
   928  	for _, s := range n.subnets {
   929  		sj := &subnetJSON{
   930  			SubnetIP: s.subnetIP.String(),
   931  			GwIP:     s.gwIP.String(),
   932  			Vni:      s.vni,
   933  		}
   934  		netJSON = append(netJSON, sj)
   935  	}
   936  
   937  	m["secure"] = n.secure
   938  	m["subnets"] = netJSON
   939  	m["mtu"] = n.mtu
   940  	b, err := json.Marshal(m)
   941  	if err != nil {
   942  		return []byte{}
   943  	}
   944  
   945  	return b
   946  }
   947  
   948  func (n *network) Index() uint64 {
   949  	return n.dbIndex
   950  }
   951  
   952  func (n *network) SetIndex(index uint64) {
   953  	n.dbIndex = index
   954  	n.dbExists = true
   955  }
   956  
   957  func (n *network) Exists() bool {
   958  	return n.dbExists
   959  }
   960  
   961  func (n *network) Skip() bool {
   962  	return false
   963  }
   964  
   965  func (n *network) SetValue(value []byte) error {
   966  	var (
   967  		m       map[string]interface{}
   968  		newNet  bool
   969  		isMap   = true
   970  		netJSON = []*subnetJSON{}
   971  	)
   972  
   973  	if err := json.Unmarshal(value, &m); err != nil {
   974  		err := json.Unmarshal(value, &netJSON)
   975  		if err != nil {
   976  			return err
   977  		}
   978  		isMap = false
   979  	}
   980  
   981  	if len(n.subnets) == 0 {
   982  		newNet = true
   983  	}
   984  
   985  	if isMap {
   986  		if val, ok := m["secure"]; ok {
   987  			n.secure = val.(bool)
   988  		}
   989  		if val, ok := m["mtu"]; ok {
   990  			n.mtu = int(val.(float64))
   991  		}
   992  		bytes, err := json.Marshal(m["subnets"])
   993  		if err != nil {
   994  			return err
   995  		}
   996  		if err := json.Unmarshal(bytes, &netJSON); err != nil {
   997  			return err
   998  		}
   999  	}
  1000  
  1001  	for _, sj := range netJSON {
  1002  		subnetIPstr := sj.SubnetIP
  1003  		gwIPstr := sj.GwIP
  1004  		vni := sj.Vni
  1005  
  1006  		subnetIP, _ := types.ParseCIDR(subnetIPstr)
  1007  		gwIP, _ := types.ParseCIDR(gwIPstr)
  1008  
  1009  		if newNet {
  1010  			s := &subnet{
  1011  				subnetIP: subnetIP,
  1012  				gwIP:     gwIP,
  1013  				vni:      vni,
  1014  			}
  1015  			n.subnets = append(n.subnets, s)
  1016  		} else {
  1017  			sNet := n.getMatchingSubnet(subnetIP)
  1018  			if sNet != nil {
  1019  				sNet.vni = vni
  1020  			}
  1021  		}
  1022  	}
  1023  	return nil
  1024  }
  1025  
  1026  func (n *network) DataScope() string {
  1027  	return datastore.GlobalScope
  1028  }
  1029  
  1030  func (n *network) writeToStore() error {
  1031  	if n.driver.store == nil {
  1032  		return nil
  1033  	}
  1034  
  1035  	return n.driver.store.PutObjectAtomic(n)
  1036  }
  1037  
  1038  func (n *network) releaseVxlanID() ([]uint32, error) {
  1039  	n.Lock()
  1040  	nSubnets := len(n.subnets)
  1041  	n.Unlock()
  1042  	if nSubnets == 0 {
  1043  		return nil, nil
  1044  	}
  1045  
  1046  	if n.driver.store != nil {
  1047  		if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  1048  			if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  1049  				// In both the above cases we can safely assume that the key has been removed by some other
  1050  				// instance and so simply get out of here
  1051  				return nil, nil
  1052  			}
  1053  
  1054  			return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  1055  		}
  1056  	}
  1057  	var vnis []uint32
  1058  	n.Lock()
  1059  	for _, s := range n.subnets {
  1060  		if n.driver.vxlanIdm != nil {
  1061  			vnis = append(vnis, s.vni)
  1062  		}
  1063  		s.vni = 0
  1064  	}
  1065  	n.Unlock()
  1066  
  1067  	for _, vni := range vnis {
  1068  		n.driver.vxlanIdm.Release(uint64(vni))
  1069  	}
  1070  
  1071  	return vnis, nil
  1072  }
  1073  
  1074  func (n *network) obtainVxlanID(s *subnet) error {
  1075  	//return if the subnet already has a vxlan id assigned
  1076  	if n.vxlanID(s) != 0 {
  1077  		return nil
  1078  	}
  1079  
  1080  	if n.driver.store == nil {
  1081  		return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
  1082  	}
  1083  
  1084  	for {
  1085  		if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  1086  			return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  1087  		}
  1088  
  1089  		if n.vxlanID(s) == 0 {
  1090  			vxlanID, err := n.driver.vxlanIdm.GetID(true)
  1091  			if err != nil {
  1092  				return fmt.Errorf("failed to allocate vxlan id: %v", err)
  1093  			}
  1094  
  1095  			n.setVxlanID(s, uint32(vxlanID))
  1096  			if err := n.writeToStore(); err != nil {
  1097  				n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  1098  				n.setVxlanID(s, 0)
  1099  				if err == datastore.ErrKeyModified {
  1100  					continue
  1101  				}
  1102  				return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  1103  			}
  1104  			return nil
  1105  		}
  1106  		return nil
  1107  	}
  1108  }
  1109  
  1110  // contains return true if the passed ip belongs to one the network's
  1111  // subnets
  1112  func (n *network) contains(ip net.IP) bool {
  1113  	for _, s := range n.subnets {
  1114  		if s.subnetIP.Contains(ip) {
  1115  			return true
  1116  		}
  1117  	}
  1118  
  1119  	return false
  1120  }
  1121  
  1122  // getSubnetforIP returns the subnet to which the given IP belongs
  1123  func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  1124  	for _, s := range n.subnets {
  1125  		// first check if the mask lengths are the same
  1126  		i, _ := s.subnetIP.Mask.Size()
  1127  		j, _ := ip.Mask.Size()
  1128  		if i != j {
  1129  			continue
  1130  		}
  1131  		if s.subnetIP.Contains(ip.IP) {
  1132  			return s
  1133  		}
  1134  	}
  1135  	return nil
  1136  }
  1137  
  1138  // getMatchingSubnet return the network's subnet that matches the input
  1139  func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  1140  	if ip == nil {
  1141  		return nil
  1142  	}
  1143  	for _, s := range n.subnets {
  1144  		// first check if the mask lengths are the same
  1145  		i, _ := s.subnetIP.Mask.Size()
  1146  		j, _ := ip.Mask.Size()
  1147  		if i != j {
  1148  			continue
  1149  		}
  1150  		if s.subnetIP.IP.Equal(ip.IP) {
  1151  			return s
  1152  		}
  1153  	}
  1154  	return nil
  1155  }