github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/drivers/overlay/ov_network.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"encoding/json"
     8  	"fmt"
     9  	"net"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"runtime"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/docker/docker/libnetwork/datastore"
    19  	"github.com/docker/docker/libnetwork/driverapi"
    20  	"github.com/docker/docker/libnetwork/netlabel"
    21  	"github.com/docker/docker/libnetwork/netutils"
    22  	"github.com/docker/docker/libnetwork/ns"
    23  	"github.com/docker/docker/libnetwork/osl"
    24  	"github.com/docker/docker/libnetwork/resolvconf"
    25  	"github.com/docker/docker/libnetwork/types"
    26  	"github.com/docker/docker/pkg/reexec"
    27  	"github.com/sirupsen/logrus"
    28  	"github.com/vishvananda/netlink"
    29  	"github.com/vishvananda/netlink/nl"
    30  	"github.com/vishvananda/netns"
    31  	"golang.org/x/sys/unix"
    32  )
    33  
    34  var (
    35  	hostMode    bool
    36  	networkOnce sync.Once
    37  	networkMu   sync.Mutex
    38  	vniTbl      = make(map[uint32]string)
    39  )
    40  
    41  type networkTable map[string]*network
    42  
    43  type subnet struct {
    44  	sboxInit  bool
    45  	vxlanName string
    46  	brName    string
    47  	vni       uint32
    48  	initErr   error
    49  	subnetIP  *net.IPNet
    50  	gwIP      *net.IPNet
    51  }
    52  
    53  type subnetJSON struct {
    54  	SubnetIP string
    55  	GwIP     string
    56  	Vni      uint32
    57  }
    58  
    59  type network struct {
    60  	id        string
    61  	dbIndex   uint64
    62  	dbExists  bool
    63  	sbox      osl.Sandbox
    64  	nlSocket  *nl.NetlinkSocket
    65  	endpoints endpointTable
    66  	driver    *driver
    67  	joinCnt   int
    68  	sboxInit  bool
    69  	initEpoch int
    70  	initErr   error
    71  	subnets   []*subnet
    72  	secure    bool
    73  	mtu       int
    74  	sync.Mutex
    75  }
    76  
    77  func init() {
    78  	reexec.Register("set-default-vlan", setDefaultVlan)
    79  
    80  	// Lock main() to the initial thread to exclude the goroutines executing
    81  	// func (*network).watchMiss() from being scheduled onto that thread.
    82  	// Changes to the network namespace of the initial thread alter
    83  	// /proc/self/ns/net, which would break any code which (incorrectly)
    84  	// assumes that that file is a handle to the network namespace for the
    85  	// thread it is currently executing on.
    86  	runtime.LockOSThread()
    87  }
    88  
    89  func setDefaultVlan() {
    90  	if len(os.Args) < 3 {
    91  		logrus.Error("insufficient number of arguments")
    92  		os.Exit(1)
    93  	}
    94  
    95  	runtime.LockOSThread()
    96  	defer runtime.UnlockOSThread()
    97  
    98  	nsPath := os.Args[1]
    99  	ns, err := netns.GetFromPath(nsPath)
   100  	if err != nil {
   101  		logrus.Errorf("overlay namespace get failed, %v", err)
   102  		os.Exit(1)
   103  	}
   104  	if err = netns.Set(ns); err != nil {
   105  		logrus.Errorf("setting into overlay namespace failed, %v", err)
   106  		os.Exit(1)
   107  	}
   108  
   109  	// make sure the sysfs mount doesn't propagate back
   110  	if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
   111  		logrus.Errorf("unshare failed, %v", err)
   112  		os.Exit(1)
   113  	}
   114  
   115  	flag := unix.MS_PRIVATE | unix.MS_REC
   116  	if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
   117  		logrus.Errorf("root mount failed, %v", err)
   118  		os.Exit(1)
   119  	}
   120  
   121  	if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
   122  		logrus.Errorf("mounting sysfs failed, %v", err)
   123  		os.Exit(1)
   124  	}
   125  
   126  	brName := os.Args[2]
   127  	path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
   128  	data := []byte{'0', '\n'}
   129  
   130  	if err = os.WriteFile(path, data, 0644); err != nil {
   131  		logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err)
   132  		os.Exit(1)
   133  	}
   134  	os.Exit(0)
   135  }
   136  
   137  func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
   138  	return nil, types.NotImplementedErrorf("not implemented")
   139  }
   140  
   141  func (d *driver) NetworkFree(id string) error {
   142  	return types.NotImplementedErrorf("not implemented")
   143  }
   144  
   145  func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
   146  	if id == "" {
   147  		return fmt.Errorf("invalid network id")
   148  	}
   149  	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
   150  		return types.BadRequestErrorf("ipv4 pool is empty")
   151  	}
   152  
   153  	// Since we perform lazy configuration make sure we try
   154  	// configuring the driver when we enter CreateNetwork
   155  	if err := d.configure(); err != nil {
   156  		return err
   157  	}
   158  
   159  	n := &network{
   160  		id:        id,
   161  		driver:    d,
   162  		endpoints: endpointTable{},
   163  		subnets:   []*subnet{},
   164  	}
   165  
   166  	vnis := make([]uint32, 0, len(ipV4Data))
   167  	if gval, ok := option[netlabel.GenericData]; ok {
   168  		optMap := gval.(map[string]string)
   169  		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
   170  			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
   171  			vniStrings := strings.Split(val, ",")
   172  			for _, vniStr := range vniStrings {
   173  				vni, err := strconv.Atoi(vniStr)
   174  				if err != nil {
   175  					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
   176  				}
   177  
   178  				vnis = append(vnis, uint32(vni))
   179  			}
   180  		}
   181  		if _, ok := optMap[secureOption]; ok {
   182  			n.secure = true
   183  		}
   184  		if val, ok := optMap[netlabel.DriverMTU]; ok {
   185  			var err error
   186  			if n.mtu, err = strconv.Atoi(val); err != nil {
   187  				return fmt.Errorf("failed to parse %v: %v", val, err)
   188  			}
   189  			if n.mtu < 0 {
   190  				return fmt.Errorf("invalid MTU value: %v", n.mtu)
   191  			}
   192  		}
   193  	}
   194  
   195  	// If we are getting vnis from libnetwork, either we get for
   196  	// all subnets or none.
   197  	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
   198  		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
   199  	}
   200  
   201  	for i, ipd := range ipV4Data {
   202  		s := &subnet{
   203  			subnetIP: ipd.Pool,
   204  			gwIP:     ipd.Gateway,
   205  		}
   206  
   207  		if len(vnis) != 0 {
   208  			s.vni = vnis[i]
   209  		}
   210  
   211  		n.subnets = append(n.subnets, s)
   212  	}
   213  
   214  	d.Lock()
   215  	defer d.Unlock()
   216  	if d.networks[n.id] != nil {
   217  		return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
   218  	}
   219  
   220  	if err := n.writeToStore(); err != nil {
   221  		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
   222  	}
   223  
   224  	// Make sure no rule is on the way from any stale secure network
   225  	if !n.secure {
   226  		for _, vni := range vnis {
   227  			programMangle(vni, false)
   228  			programInput(vni, false)
   229  		}
   230  	}
   231  
   232  	if nInfo != nil {
   233  		if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
   234  			// XXX Undo writeToStore?  No method to so.  Why?
   235  			return err
   236  		}
   237  	}
   238  
   239  	d.networks[id] = n
   240  
   241  	return nil
   242  }
   243  
   244  func (d *driver) DeleteNetwork(nid string) error {
   245  	if nid == "" {
   246  		return fmt.Errorf("invalid network id")
   247  	}
   248  
   249  	// Make sure driver resources are initialized before proceeding
   250  	if err := d.configure(); err != nil {
   251  		return err
   252  	}
   253  
   254  	d.Lock()
   255  	// Only perform a peer flush operation (if required) AFTER unlocking
   256  	// the driver lock to avoid deadlocking w/ the peerDB.
   257  	var doPeerFlush bool
   258  	defer func() {
   259  		d.Unlock()
   260  		if doPeerFlush {
   261  			d.peerFlush(nid)
   262  		}
   263  	}()
   264  
   265  	// This is similar to d.network(), but we need to keep holding the lock
   266  	// until we are done removing this network.
   267  	n, ok := d.networks[nid]
   268  	if !ok {
   269  		n = d.restoreNetworkFromStore(nid)
   270  	}
   271  	if n == nil {
   272  		return fmt.Errorf("could not find network with id %s", nid)
   273  	}
   274  
   275  	for _, ep := range n.endpoints {
   276  		if ep.ifName != "" {
   277  			if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil {
   278  				if err := ns.NlHandle().LinkDel(link); err != nil {
   279  					logrus.WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id)
   280  				}
   281  			}
   282  		}
   283  
   284  		if err := d.deleteEndpointFromStore(ep); err != nil {
   285  			logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
   286  		}
   287  	}
   288  
   289  	doPeerFlush = true
   290  	delete(d.networks, nid)
   291  
   292  	vnis, err := n.releaseVxlanID()
   293  	if err != nil {
   294  		return err
   295  	}
   296  
   297  	if n.secure {
   298  		for _, vni := range vnis {
   299  			programMangle(vni, false)
   300  			programInput(vni, false)
   301  		}
   302  	}
   303  
   304  	return nil
   305  }
   306  
   307  func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
   308  	return nil
   309  }
   310  
   311  func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
   312  	return nil
   313  }
   314  
   315  func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error {
   316  	// If there is a race between two go routines here only one will win
   317  	// the other will wait.
   318  	networkOnce.Do(networkOnceInit)
   319  
   320  	n.Lock()
   321  	// If non-restore initialization occurred and was successful then
   322  	// tell the peerDB to initialize the sandbox with all the peers
   323  	// previously received from networkdb.  But only do this after
   324  	// unlocking the network.  Otherwise we could deadlock with
   325  	// on the peerDB channel while peerDB is waiting for the network lock.
   326  	var doInitPeerDB bool
   327  	defer func() {
   328  		n.Unlock()
   329  		if doInitPeerDB {
   330  			go n.driver.initSandboxPeerDB(n.id)
   331  		}
   332  	}()
   333  
   334  	if !n.sboxInit {
   335  		n.initErr = n.initSandbox(restore)
   336  		doInitPeerDB = n.initErr == nil && !restore
   337  		// If there was an error, we cannot recover it
   338  		n.sboxInit = true
   339  	}
   340  
   341  	if n.initErr != nil {
   342  		return fmt.Errorf("network sandbox join failed: %v", n.initErr)
   343  	}
   344  
   345  	subnetErr := s.initErr
   346  	if !s.sboxInit {
   347  		subnetErr = n.initSubnetSandbox(s, restore)
   348  		// We can recover from these errors, but not on restore
   349  		if restore || subnetErr == nil {
   350  			s.initErr = subnetErr
   351  			s.sboxInit = true
   352  		}
   353  	}
   354  	if subnetErr != nil {
   355  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr)
   356  	}
   357  
   358  	if incJoinCount {
   359  		n.joinCnt++
   360  	}
   361  
   362  	return nil
   363  }
   364  
   365  func (n *network) leaveSandbox() {
   366  	n.Lock()
   367  	defer n.Unlock()
   368  	n.joinCnt--
   369  	if n.joinCnt != 0 {
   370  		return
   371  	}
   372  
   373  	n.destroySandbox()
   374  
   375  	n.sboxInit = false
   376  	n.initErr = nil
   377  	for _, s := range n.subnets {
   378  		s.sboxInit = false
   379  		s.initErr = nil
   380  	}
   381  }
   382  
   383  // to be called while holding network lock
   384  func (n *network) destroySandbox() {
   385  	if n.sbox != nil {
   386  		for _, iface := range n.sbox.Info().Interfaces() {
   387  			if err := iface.Remove(); err != nil {
   388  				logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
   389  			}
   390  		}
   391  
   392  		for _, s := range n.subnets {
   393  			if hostMode {
   394  				if err := removeFilters(n.id[:12], s.brName); err != nil {
   395  					logrus.Warnf("Could not remove overlay filters: %v", err)
   396  				}
   397  			}
   398  
   399  			if s.vxlanName != "" {
   400  				err := deleteInterface(s.vxlanName)
   401  				if err != nil {
   402  					logrus.Warnf("could not cleanup sandbox properly: %v", err)
   403  				}
   404  			}
   405  		}
   406  
   407  		if hostMode {
   408  			if err := removeNetworkChain(n.id[:12]); err != nil {
   409  				logrus.Warnf("could not remove network chain: %v", err)
   410  			}
   411  		}
   412  
   413  		// Close the netlink socket, this will also release the watchMiss goroutine that is using it
   414  		if n.nlSocket != nil {
   415  			n.nlSocket.Close()
   416  			n.nlSocket = nil
   417  		}
   418  
   419  		n.sbox.Destroy()
   420  		n.sbox = nil
   421  	}
   422  }
   423  
   424  func populateVNITbl() {
   425  	filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")),
   426  		// NOTE(cpuguy83): The linter picked up on the fact that this walk function was not using this error argument
   427  		// That seems wrong... however I'm not familiar with this code or if that error matters
   428  		func(path string, _ os.DirEntry, _ error) error {
   429  			_, fname := filepath.Split(path)
   430  
   431  			if len(strings.Split(fname, "-")) <= 1 {
   432  				return nil
   433  			}
   434  
   435  			n, err := netns.GetFromPath(path)
   436  			if err != nil {
   437  				logrus.Errorf("Could not open namespace path %s during vni population: %v", path, err)
   438  				return nil
   439  			}
   440  			defer n.Close()
   441  
   442  			nlh, err := netlink.NewHandleAt(n, unix.NETLINK_ROUTE)
   443  			if err != nil {
   444  				logrus.Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
   445  				return nil
   446  			}
   447  			defer nlh.Close()
   448  
   449  			err = nlh.SetSocketTimeout(soTimeout)
   450  			if err != nil {
   451  				logrus.Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err)
   452  			}
   453  
   454  			links, err := nlh.LinkList()
   455  			if err != nil {
   456  				logrus.Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err)
   457  				return nil
   458  			}
   459  
   460  			for _, l := range links {
   461  				if l.Type() == "vxlan" {
   462  					vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path
   463  				}
   464  			}
   465  
   466  			return nil
   467  		})
   468  }
   469  
   470  func networkOnceInit() {
   471  	populateVNITbl()
   472  
   473  	if os.Getenv("_OVERLAY_HOST_MODE") != "" {
   474  		hostMode = true
   475  		return
   476  	}
   477  
   478  	err := createVxlan("testvxlan", 1, 0)
   479  	if err != nil {
   480  		logrus.Errorf("Failed to create testvxlan interface: %v", err)
   481  		return
   482  	}
   483  
   484  	defer deleteInterface("testvxlan")
   485  
   486  	path := "/proc/self/ns/net"
   487  	hNs, err := netns.GetFromPath(path)
   488  	if err != nil {
   489  		logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
   490  		return
   491  	}
   492  	defer hNs.Close()
   493  
   494  	nlh := ns.NlHandle()
   495  
   496  	iface, err := nlh.LinkByName("testvxlan")
   497  	if err != nil {
   498  		logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
   499  		return
   500  	}
   501  
   502  	// If we are not able to move the vxlan interface to a namespace
   503  	// then fallback to host mode
   504  	if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
   505  		hostMode = true
   506  	}
   507  }
   508  
   509  func (n *network) generateVxlanName(s *subnet) string {
   510  	id := n.id
   511  	if len(n.id) > 5 {
   512  		id = n.id[:5]
   513  	}
   514  
   515  	return fmt.Sprintf("vx-%06x-%v", s.vni, id)
   516  }
   517  
   518  func (n *network) generateBridgeName(s *subnet) string {
   519  	id := n.id
   520  	if len(n.id) > 5 {
   521  		id = n.id[:5]
   522  	}
   523  
   524  	return n.getBridgeNamePrefix(s) + "-" + id
   525  }
   526  
   527  func (n *network) getBridgeNamePrefix(s *subnet) string {
   528  	return fmt.Sprintf("ov-%06x", s.vni)
   529  }
   530  
   531  func checkOverlap(nw *net.IPNet) error {
   532  	var nameservers []string
   533  
   534  	if rc, err := resolvconf.Get(); err == nil {
   535  		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
   536  	}
   537  
   538  	if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
   539  		return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
   540  	}
   541  
   542  	if err := netutils.CheckRouteOverlaps(nw); err != nil {
   543  		return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
   544  	}
   545  
   546  	return nil
   547  }
   548  
   549  func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
   550  	// restore overlay osl sandbox
   551  	ifaces := map[string][]osl.IfaceOption{
   552  		brName + "+br": {
   553  			n.sbox.InterfaceOptions().Address(s.gwIP),
   554  			n.sbox.InterfaceOptions().Bridge(true),
   555  		},
   556  	}
   557  	if err := n.sbox.Restore(ifaces, nil, nil, nil); err != nil {
   558  		return err
   559  	}
   560  
   561  	ifaces = map[string][]osl.IfaceOption{
   562  		vxlanName + "+vxlan": {
   563  			n.sbox.InterfaceOptions().Master(brName),
   564  		},
   565  	}
   566  	return n.sbox.Restore(ifaces, nil, nil, nil)
   567  }
   568  
   569  func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
   570  	if hostMode {
   571  		// Try to delete stale bridge interface if it exists
   572  		if err := deleteInterface(brName); err != nil {
   573  			deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
   574  		}
   575  		// Try to delete the vxlan interface by vni if already present
   576  		deleteVxlanByVNI("", s.vni)
   577  
   578  		if err := checkOverlap(s.subnetIP); err != nil {
   579  			return err
   580  		}
   581  	}
   582  
   583  	if !hostMode {
   584  		// Try to find this subnet's vni is being used in some
   585  		// other namespace by looking at vniTbl that we just
   586  		// populated in the once init. If a hit is found then
   587  		// it must a stale namespace from previous
   588  		// life. Destroy it completely and reclaim resourced.
   589  		networkMu.Lock()
   590  		path, ok := vniTbl[s.vni]
   591  		networkMu.Unlock()
   592  
   593  		if ok {
   594  			deleteVxlanByVNI(path, s.vni)
   595  			if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
   596  				logrus.Errorf("unmount of %s failed: %v", path, err)
   597  			}
   598  			os.Remove(path)
   599  
   600  			networkMu.Lock()
   601  			delete(vniTbl, s.vni)
   602  			networkMu.Unlock()
   603  		}
   604  	}
   605  
   606  	// create a bridge and vxlan device for this subnet and move it to the sandbox
   607  	sbox := n.sbox
   608  
   609  	if err := sbox.AddInterface(brName, "br",
   610  		sbox.InterfaceOptions().Address(s.gwIP),
   611  		sbox.InterfaceOptions().Bridge(true)); err != nil {
   612  		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
   613  	}
   614  
   615  	err := createVxlan(vxlanName, s.vni, n.maxMTU())
   616  	if err != nil {
   617  		return err
   618  	}
   619  
   620  	if err := sbox.AddInterface(vxlanName, "vxlan",
   621  		sbox.InterfaceOptions().Master(brName)); err != nil {
   622  		// If adding vxlan device to the overlay namespace fails, remove the bridge interface we
   623  		// already added to the namespace. This allows the caller to try the setup again.
   624  		for _, iface := range sbox.Info().Interfaces() {
   625  			if iface.SrcName() == brName {
   626  				if ierr := iface.Remove(); ierr != nil {
   627  					logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr)
   628  				}
   629  			}
   630  		}
   631  
   632  		// Also, delete the vxlan interface. Since a global vni id is associated
   633  		// with the vxlan interface, an orphaned vxlan interface will result in
   634  		// failure of vxlan device creation if the vni is assigned to some other
   635  		// network.
   636  		if deleteErr := deleteInterface(vxlanName); deleteErr != nil {
   637  			logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err)
   638  		}
   639  		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
   640  	}
   641  
   642  	if !hostMode {
   643  		var name string
   644  		for _, i := range sbox.Info().Interfaces() {
   645  			if i.Bridge() {
   646  				name = i.DstName()
   647  			}
   648  		}
   649  		cmd := &exec.Cmd{
   650  			Path:   reexec.Self(),
   651  			Args:   []string{"set-default-vlan", sbox.Key(), name},
   652  			Stdout: os.Stdout,
   653  			Stderr: os.Stderr,
   654  		}
   655  		if err := cmd.Run(); err != nil {
   656  			// not a fatal error
   657  			logrus.Errorf("reexec to set bridge default vlan failed %v", err)
   658  		}
   659  	}
   660  
   661  	if hostMode {
   662  		if err := addFilters(n.id[:12], brName); err != nil {
   663  			return err
   664  		}
   665  	}
   666  
   667  	return nil
   668  }
   669  
   670  // Must be called with the network lock
   671  func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
   672  	brName := n.generateBridgeName(s)
   673  	vxlanName := n.generateVxlanName(s)
   674  
   675  	if restore {
   676  		if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
   677  			return err
   678  		}
   679  	} else {
   680  		if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
   681  			return err
   682  		}
   683  	}
   684  
   685  	s.vxlanName = vxlanName
   686  	s.brName = brName
   687  
   688  	return nil
   689  }
   690  
   691  func (n *network) cleanupStaleSandboxes() {
   692  	filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")),
   693  		func(path string, _ os.DirEntry, _ error) error {
   694  			_, fname := filepath.Split(path)
   695  
   696  			pList := strings.Split(fname, "-")
   697  			if len(pList) <= 1 {
   698  				return nil
   699  			}
   700  
   701  			pattern := pList[1]
   702  			if strings.Contains(n.id, pattern) {
   703  				// Delete all vnis
   704  				deleteVxlanByVNI(path, 0)
   705  				unix.Unmount(path, unix.MNT_DETACH)
   706  				os.Remove(path)
   707  
   708  				// Now that we have destroyed this
   709  				// sandbox, remove all references to
   710  				// it in vniTbl so that we don't
   711  				// inadvertently destroy the sandbox
   712  				// created in this life.
   713  				networkMu.Lock()
   714  				for vni, tblPath := range vniTbl {
   715  					if tblPath == path {
   716  						delete(vniTbl, vni)
   717  					}
   718  				}
   719  				networkMu.Unlock()
   720  			}
   721  
   722  			return nil
   723  		})
   724  }
   725  
   726  func (n *network) initSandbox(restore bool) error {
   727  	n.initEpoch++
   728  
   729  	if !restore {
   730  		if hostMode {
   731  			if err := addNetworkChain(n.id[:12]); err != nil {
   732  				return err
   733  			}
   734  		}
   735  
   736  		// If there are any stale sandboxes related to this network
   737  		// from previous daemon life clean it up here
   738  		n.cleanupStaleSandboxes()
   739  	}
   740  
   741  	// In the restore case network sandbox already exist; but we don't know
   742  	// what epoch number it was created with. It has to be retrieved by
   743  	// searching the net namespaces.
   744  	var key string
   745  	if restore {
   746  		key = osl.GenerateKey("-" + n.id)
   747  	} else {
   748  		key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
   749  	}
   750  
   751  	sbox, err := osl.NewSandbox(key, !hostMode, restore)
   752  	if err != nil {
   753  		return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
   754  	}
   755  
   756  	// this is needed to let the peerAdd configure the sandbox
   757  	n.sbox = sbox
   758  
   759  	// If we are in swarm mode, we don't need anymore the watchMiss routine.
   760  	// This will save 1 thread and 1 netlink socket per network
   761  	if !n.driver.isSerfAlive() {
   762  		return nil
   763  	}
   764  
   765  	var nlSock *nl.NetlinkSocket
   766  	sbox.InvokeFunc(func() {
   767  		nlSock, err = nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
   768  		if err != nil {
   769  			return
   770  		}
   771  		// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
   772  		tv := unix.NsecToTimeval(soTimeout.Nanoseconds())
   773  		err = nlSock.SetReceiveTimeout(&tv)
   774  	})
   775  	n.nlSocket = nlSock
   776  
   777  	if err == nil {
   778  		go n.watchMiss(nlSock, key)
   779  	} else {
   780  		logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v",
   781  			n.id, sbox.Key(), err)
   782  	}
   783  
   784  	return nil
   785  }
   786  
   787  func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
   788  	// With the new version of the netlink library the deserialize function makes
   789  	// requests about the interface of the netlink message. This can succeed only
   790  	// if this go routine is in the target namespace.
   791  	origNs, err := netns.Get()
   792  	if err != nil {
   793  		logrus.WithError(err).Error("failed to get the initial network namespace")
   794  		return
   795  	}
   796  	defer origNs.Close()
   797  	newNs, err := netns.GetFromPath(nsPath)
   798  	if err != nil {
   799  		logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath)
   800  		return
   801  	}
   802  	defer newNs.Close()
   803  
   804  	runtime.LockOSThread()
   805  	if err = netns.Set(newNs); err != nil {
   806  		logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath)
   807  		runtime.UnlockOSThread()
   808  		return
   809  	}
   810  	defer func() {
   811  		if err := netns.Set(origNs); err != nil {
   812  			logrus.WithError(err).Error("failed to restore the thread's initial network namespace")
   813  			// The error is only fatal for the current thread. Keep this
   814  			// goroutine locked to the thread to make the runtime replace it
   815  			// with a clean thread once this goroutine terminates.
   816  		} else {
   817  			runtime.UnlockOSThread()
   818  		}
   819  	}()
   820  	for {
   821  		msgs, _, err := nlSock.Receive()
   822  		if err != nil {
   823  			n.Lock()
   824  			nlFd := nlSock.GetFd()
   825  			n.Unlock()
   826  			if nlFd == -1 {
   827  				// The netlink socket got closed, simply exit to not leak this goroutine
   828  				return
   829  			}
   830  			// When the receive timeout expires the receive will return EAGAIN
   831  			if err == unix.EAGAIN {
   832  				// we continue here to avoid spam for timeouts
   833  				continue
   834  			}
   835  			logrus.Errorf("Failed to receive from netlink: %v ", err)
   836  			continue
   837  		}
   838  
   839  		for _, msg := range msgs {
   840  			if msg.Header.Type != unix.RTM_GETNEIGH && msg.Header.Type != unix.RTM_NEWNEIGH {
   841  				continue
   842  			}
   843  
   844  			neigh, err := netlink.NeighDeserialize(msg.Data)
   845  			if err != nil {
   846  				logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
   847  				continue
   848  			}
   849  
   850  			var (
   851  				ip             net.IP
   852  				mac            net.HardwareAddr
   853  				l2Miss, l3Miss bool
   854  			)
   855  			if neigh.IP.To4() != nil {
   856  				ip = neigh.IP
   857  				l3Miss = true
   858  			} else if neigh.HardwareAddr != nil {
   859  				mac = []byte(neigh.HardwareAddr)
   860  				ip = net.IP(mac[2:])
   861  				l2Miss = true
   862  			} else {
   863  				continue
   864  			}
   865  
   866  			// Not any of the network's subnets. Ignore.
   867  			if !n.contains(ip) {
   868  				continue
   869  			}
   870  
   871  			if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
   872  				continue
   873  			}
   874  
   875  			logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
   876  			mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
   877  			if err != nil {
   878  				logrus.Errorf("could not resolve peer %q: %v", ip, err)
   879  				continue
   880  			}
   881  			n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
   882  		}
   883  	}
   884  }
   885  
   886  // Restore a network from the store to the driver if it is present.
   887  // Must be called with the driver locked!
   888  func (d *driver) restoreNetworkFromStore(nid string) *network {
   889  	n := d.getNetworkFromStore(nid)
   890  	if n != nil {
   891  		n.driver = d
   892  		n.endpoints = endpointTable{}
   893  		d.networks[nid] = n
   894  	}
   895  	return n
   896  }
   897  
   898  func (d *driver) network(nid string) *network {
   899  	d.Lock()
   900  	n, ok := d.networks[nid]
   901  	if !ok {
   902  		n = d.restoreNetworkFromStore(nid)
   903  	}
   904  	d.Unlock()
   905  
   906  	return n
   907  }
   908  
   909  func (d *driver) getNetworkFromStore(nid string) *network {
   910  	if d.store == nil {
   911  		return nil
   912  	}
   913  
   914  	n := &network{id: nid}
   915  	if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
   916  		return nil
   917  	}
   918  
   919  	return n
   920  }
   921  
   922  func (n *network) sandbox() osl.Sandbox {
   923  	n.Lock()
   924  	defer n.Unlock()
   925  	return n.sbox
   926  }
   927  
   928  func (n *network) vxlanID(s *subnet) uint32 {
   929  	n.Lock()
   930  	defer n.Unlock()
   931  	return s.vni
   932  }
   933  
   934  func (n *network) setVxlanID(s *subnet, vni uint32) {
   935  	n.Lock()
   936  	s.vni = vni
   937  	n.Unlock()
   938  }
   939  
   940  func (n *network) Key() []string {
   941  	return []string{"overlay", "network", n.id}
   942  }
   943  
   944  func (n *network) KeyPrefix() []string {
   945  	return []string{"overlay", "network"}
   946  }
   947  
   948  func (n *network) Value() []byte {
   949  	m := map[string]interface{}{}
   950  
   951  	netJSON := []*subnetJSON{}
   952  
   953  	for _, s := range n.subnets {
   954  		sj := &subnetJSON{
   955  			SubnetIP: s.subnetIP.String(),
   956  			GwIP:     s.gwIP.String(),
   957  			Vni:      s.vni,
   958  		}
   959  		netJSON = append(netJSON, sj)
   960  	}
   961  
   962  	m["secure"] = n.secure
   963  	m["subnets"] = netJSON
   964  	m["mtu"] = n.mtu
   965  	b, err := json.Marshal(m)
   966  	if err != nil {
   967  		return []byte{}
   968  	}
   969  
   970  	return b
   971  }
   972  
   973  func (n *network) Index() uint64 {
   974  	return n.dbIndex
   975  }
   976  
   977  func (n *network) SetIndex(index uint64) {
   978  	n.dbIndex = index
   979  	n.dbExists = true
   980  }
   981  
   982  func (n *network) Exists() bool {
   983  	return n.dbExists
   984  }
   985  
   986  func (n *network) Skip() bool {
   987  	return false
   988  }
   989  
   990  func (n *network) SetValue(value []byte) error {
   991  	var (
   992  		m       map[string]interface{}
   993  		newNet  bool
   994  		isMap   = true
   995  		netJSON = []*subnetJSON{}
   996  	)
   997  
   998  	if err := json.Unmarshal(value, &m); err != nil {
   999  		err := json.Unmarshal(value, &netJSON)
  1000  		if err != nil {
  1001  			return err
  1002  		}
  1003  		isMap = false
  1004  	}
  1005  
  1006  	if len(n.subnets) == 0 {
  1007  		newNet = true
  1008  	}
  1009  
  1010  	if isMap {
  1011  		if val, ok := m["secure"]; ok {
  1012  			n.secure = val.(bool)
  1013  		}
  1014  		if val, ok := m["mtu"]; ok {
  1015  			n.mtu = int(val.(float64))
  1016  		}
  1017  		bytes, err := json.Marshal(m["subnets"])
  1018  		if err != nil {
  1019  			return err
  1020  		}
  1021  		if err := json.Unmarshal(bytes, &netJSON); err != nil {
  1022  			return err
  1023  		}
  1024  	}
  1025  
  1026  	for _, sj := range netJSON {
  1027  		subnetIPstr := sj.SubnetIP
  1028  		gwIPstr := sj.GwIP
  1029  		vni := sj.Vni
  1030  
  1031  		subnetIP, _ := types.ParseCIDR(subnetIPstr)
  1032  		gwIP, _ := types.ParseCIDR(gwIPstr)
  1033  
  1034  		if newNet {
  1035  			s := &subnet{
  1036  				subnetIP: subnetIP,
  1037  				gwIP:     gwIP,
  1038  				vni:      vni,
  1039  			}
  1040  			n.subnets = append(n.subnets, s)
  1041  		} else {
  1042  			sNet := n.getMatchingSubnet(subnetIP)
  1043  			if sNet != nil {
  1044  				sNet.vni = vni
  1045  			}
  1046  		}
  1047  	}
  1048  	return nil
  1049  }
  1050  
  1051  func (n *network) DataScope() string {
  1052  	return datastore.GlobalScope
  1053  }
  1054  
  1055  func (n *network) writeToStore() error {
  1056  	if n.driver.store == nil {
  1057  		return nil
  1058  	}
  1059  
  1060  	return n.driver.store.PutObjectAtomic(n)
  1061  }
  1062  
  1063  func (n *network) releaseVxlanID() ([]uint32, error) {
  1064  	n.Lock()
  1065  	nSubnets := len(n.subnets)
  1066  	n.Unlock()
  1067  	if nSubnets == 0 {
  1068  		return nil, nil
  1069  	}
  1070  
  1071  	if n.driver.store != nil {
  1072  		if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  1073  			if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  1074  				// In both the above cases we can safely assume that the key has been removed by some other
  1075  				// instance and so simply get out of here
  1076  				return nil, nil
  1077  			}
  1078  
  1079  			return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  1080  		}
  1081  	}
  1082  	var vnis []uint32
  1083  	n.Lock()
  1084  	for _, s := range n.subnets {
  1085  		if n.driver.vxlanIdm != nil {
  1086  			vnis = append(vnis, s.vni)
  1087  		}
  1088  		s.vni = 0
  1089  	}
  1090  	n.Unlock()
  1091  
  1092  	for _, vni := range vnis {
  1093  		n.driver.vxlanIdm.Release(uint64(vni))
  1094  	}
  1095  
  1096  	return vnis, nil
  1097  }
  1098  
  1099  func (n *network) obtainVxlanID(s *subnet) error {
  1100  	// return if the subnet already has a vxlan id assigned
  1101  	if n.vxlanID(s) != 0 {
  1102  		return nil
  1103  	}
  1104  
  1105  	if n.driver.store == nil {
  1106  		return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
  1107  	}
  1108  
  1109  	for {
  1110  		if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  1111  			return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  1112  		}
  1113  
  1114  		if n.vxlanID(s) == 0 {
  1115  			vxlanID, err := n.driver.vxlanIdm.GetID(true)
  1116  			if err != nil {
  1117  				return fmt.Errorf("failed to allocate vxlan id: %v", err)
  1118  			}
  1119  
  1120  			n.setVxlanID(s, uint32(vxlanID))
  1121  			if err := n.writeToStore(); err != nil {
  1122  				n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  1123  				n.setVxlanID(s, 0)
  1124  				if err == datastore.ErrKeyModified {
  1125  					continue
  1126  				}
  1127  				return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  1128  			}
  1129  			return nil
  1130  		}
  1131  		return nil
  1132  	}
  1133  }
  1134  
  1135  // contains return true if the passed ip belongs to one the network's
  1136  // subnets
  1137  func (n *network) contains(ip net.IP) bool {
  1138  	for _, s := range n.subnets {
  1139  		if s.subnetIP.Contains(ip) {
  1140  			return true
  1141  		}
  1142  	}
  1143  
  1144  	return false
  1145  }
  1146  
  1147  // getSubnetforIP returns the subnet to which the given IP belongs
  1148  func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  1149  	for _, s := range n.subnets {
  1150  		// first check if the mask lengths are the same
  1151  		i, _ := s.subnetIP.Mask.Size()
  1152  		j, _ := ip.Mask.Size()
  1153  		if i != j {
  1154  			continue
  1155  		}
  1156  		if s.subnetIP.Contains(ip.IP) {
  1157  			return s
  1158  		}
  1159  	}
  1160  	return nil
  1161  }
  1162  
  1163  // getMatchingSubnet return the network's subnet that matches the input
  1164  func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  1165  	if ip == nil {
  1166  		return nil
  1167  	}
  1168  	for _, s := range n.subnets {
  1169  		// first check if the mask lengths are the same
  1170  		i, _ := s.subnetIP.Mask.Size()
  1171  		j, _ := ip.Mask.Size()
  1172  		if i != j {
  1173  			continue
  1174  		}
  1175  		if s.subnetIP.IP.Equal(ip.IP) {
  1176  			return s
  1177  		}
  1178  	}
  1179  	return nil
  1180  }