github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/drivers/overlay/ov_network.go (about)

     1  //go:build linux
     2  
     3  package overlay
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  
    17  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/driverapi"
    18  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/drivers/overlay/overlayutils"
    19  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/netlabel"
    20  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/ns"
    21  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/osl"
    22  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/types"
    23  	"github.com/containerd/log"
    24  	"github.com/vishvananda/netlink"
    25  	"github.com/vishvananda/netns"
    26  	"golang.org/x/sys/unix"
    27  )
    28  
    29  var (
    30  	networkOnce sync.Once
    31  	networkMu   sync.Mutex
    32  	vniTbl      = make(map[uint32]string)
    33  )
    34  
    35  type networkTable map[string]*network
    36  
    37  type subnet struct {
    38  	sboxInit  bool
    39  	vxlanName string
    40  	brName    string
    41  	vni       uint32
    42  	initErr   error
    43  	subnetIP  *net.IPNet
    44  	gwIP      *net.IPNet
    45  }
    46  
    47  type network struct {
    48  	id        string
    49  	sbox      *osl.Namespace
    50  	endpoints endpointTable
    51  	driver    *driver
    52  	joinCnt   int
    53  	sboxInit  bool
    54  	initEpoch int
    55  	initErr   error
    56  	subnets   []*subnet
    57  	secure    bool
    58  	mtu       int
    59  	sync.Mutex
    60  }
    61  
    62  func init() {
    63  	// Lock main() to the initial thread to exclude the goroutines executing
    64  	// func setDefaultVLAN() from being scheduled onto that thread. Changes to
    65  	// the network namespace of the initial thread alter /proc/self/ns/net,
    66  	// which would break any code which (incorrectly) assumes that that file is
    67  	// a handle to the network namespace for the thread it is currently
    68  	// executing on.
    69  	runtime.LockOSThread()
    70  }
    71  
    72  func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
    73  	return nil, types.NotImplementedErrorf("not implemented")
    74  }
    75  
    76  func (d *driver) NetworkFree(id string) error {
    77  	return types.NotImplementedErrorf("not implemented")
    78  }
    79  
    80  func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
    81  	if id == "" {
    82  		return fmt.Errorf("invalid network id")
    83  	}
    84  	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
    85  		return types.InvalidParameterErrorf("ipv4 pool is empty")
    86  	}
    87  
    88  	// Since we perform lazy configuration make sure we try
    89  	// configuring the driver when we enter CreateNetwork
    90  	if err := d.configure(); err != nil {
    91  		return err
    92  	}
    93  
    94  	n := &network{
    95  		id:        id,
    96  		driver:    d,
    97  		endpoints: endpointTable{},
    98  		subnets:   []*subnet{},
    99  	}
   100  
   101  	vnis := make([]uint32, 0, len(ipV4Data))
   102  	gval, ok := option[netlabel.GenericData]
   103  	if !ok {
   104  		return fmt.Errorf("option %s is missing", netlabel.GenericData)
   105  	}
   106  
   107  	optMap := gval.(map[string]string)
   108  	vnisOpt, ok := optMap[netlabel.OverlayVxlanIDList]
   109  	if !ok {
   110  		return errors.New("no VNI provided")
   111  	}
   112  	log.G(context.TODO()).Debugf("overlay: Received vxlan IDs: %s", vnisOpt)
   113  	var err error
   114  	vnis, err = overlayutils.AppendVNIList(vnis, vnisOpt)
   115  	if err != nil {
   116  		return err
   117  	}
   118  
   119  	if _, ok := optMap[secureOption]; ok {
   120  		n.secure = true
   121  	}
   122  	if val, ok := optMap[netlabel.DriverMTU]; ok {
   123  		var err error
   124  		if n.mtu, err = strconv.Atoi(val); err != nil {
   125  			return fmt.Errorf("failed to parse %v: %v", val, err)
   126  		}
   127  		if n.mtu < 0 {
   128  			return fmt.Errorf("invalid MTU value: %v", n.mtu)
   129  		}
   130  	}
   131  
   132  	if len(vnis) == 0 {
   133  		return errors.New("no VNI provided")
   134  	} else if len(vnis) < len(ipV4Data) {
   135  		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
   136  	}
   137  
   138  	for i, ipd := range ipV4Data {
   139  		s := &subnet{
   140  			subnetIP: ipd.Pool,
   141  			gwIP:     ipd.Gateway,
   142  			vni:      vnis[i],
   143  		}
   144  
   145  		n.subnets = append(n.subnets, s)
   146  	}
   147  
   148  	d.Lock()
   149  	defer d.Unlock()
   150  	if d.networks[n.id] != nil {
   151  		return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
   152  	}
   153  
   154  	// Make sure no rule is on the way from any stale secure network
   155  	if !n.secure {
   156  		for _, vni := range vnis {
   157  			programMangle(vni, false)
   158  			programInput(vni, false)
   159  		}
   160  	}
   161  
   162  	if nInfo != nil {
   163  		if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
   164  			// XXX Undo writeToStore?  No method to so.  Why?
   165  			return err
   166  		}
   167  	}
   168  
   169  	d.networks[id] = n
   170  
   171  	return nil
   172  }
   173  
   174  func (d *driver) DeleteNetwork(nid string) error {
   175  	if nid == "" {
   176  		return fmt.Errorf("invalid network id")
   177  	}
   178  
   179  	// Make sure driver resources are initialized before proceeding
   180  	if err := d.configure(); err != nil {
   181  		return err
   182  	}
   183  
   184  	d.Lock()
   185  	// Only perform a peer flush operation (if required) AFTER unlocking
   186  	// the driver lock to avoid deadlocking w/ the peerDB.
   187  	var doPeerFlush bool
   188  	defer func() {
   189  		d.Unlock()
   190  		if doPeerFlush {
   191  			d.peerFlush(nid)
   192  		}
   193  	}()
   194  
   195  	// This is similar to d.network(), but we need to keep holding the lock
   196  	// until we are done removing this network.
   197  	n := d.networks[nid]
   198  	if n == nil {
   199  		return fmt.Errorf("could not find network with id %s", nid)
   200  	}
   201  
   202  	for _, ep := range n.endpoints {
   203  		if ep.ifName != "" {
   204  			if link, err := ns.NlHandle().LinkByName(ep.ifName); err == nil {
   205  				if err := ns.NlHandle().LinkDel(link); err != nil {
   206  					log.G(context.TODO()).WithError(err).Warnf("Failed to delete interface (%s)'s link on endpoint (%s) delete", ep.ifName, ep.id)
   207  				}
   208  			}
   209  		}
   210  	}
   211  
   212  	doPeerFlush = true
   213  	delete(d.networks, nid)
   214  
   215  	if n.secure {
   216  		for _, s := range n.subnets {
   217  			if err := programMangle(s.vni, false); err != nil {
   218  				log.G(context.TODO()).WithFields(log.Fields{
   219  					"error":      err,
   220  					"network_id": n.id,
   221  					"subnet":     s.subnetIP,
   222  				}).Warn("Failed to clean up iptables rules during overlay network deletion")
   223  			}
   224  			if err := programInput(s.vni, false); err != nil {
   225  				log.G(context.TODO()).WithFields(log.Fields{
   226  					"error":      err,
   227  					"network_id": n.id,
   228  					"subnet":     s.subnetIP,
   229  				}).Warn("Failed to clean up iptables rules during overlay network deletion")
   230  			}
   231  		}
   232  	}
   233  
   234  	return nil
   235  }
   236  
   237  func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
   238  	return nil
   239  }
   240  
   241  func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
   242  	return nil
   243  }
   244  
   245  func (n *network) joinSandbox(s *subnet, incJoinCount bool) error {
   246  	// If there is a race between two go routines here only one will win
   247  	// the other will wait.
   248  	networkOnce.Do(populateVNITbl)
   249  
   250  	n.Lock()
   251  	// If initialization was successful then tell the peerDB to initialize the
   252  	// sandbox with all the peers previously received from networkdb. But only
   253  	// do this after unlocking the network. Otherwise we could deadlock with
   254  	// on the peerDB channel while peerDB is waiting for the network lock.
   255  	var doInitPeerDB bool
   256  	defer func() {
   257  		n.Unlock()
   258  		if doInitPeerDB {
   259  			go n.driver.initSandboxPeerDB(n.id)
   260  		}
   261  	}()
   262  
   263  	if !n.sboxInit {
   264  		n.initErr = n.initSandbox()
   265  		doInitPeerDB = n.initErr == nil
   266  		// If there was an error, we cannot recover it
   267  		n.sboxInit = true
   268  	}
   269  
   270  	if n.initErr != nil {
   271  		return fmt.Errorf("network sandbox join failed: %v", n.initErr)
   272  	}
   273  
   274  	subnetErr := s.initErr
   275  	if !s.sboxInit {
   276  		subnetErr = n.initSubnetSandbox(s)
   277  		// We can recover from these errors
   278  		if subnetErr == nil {
   279  			s.initErr = subnetErr
   280  			s.sboxInit = true
   281  		}
   282  	}
   283  	if subnetErr != nil {
   284  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr)
   285  	}
   286  
   287  	if incJoinCount {
   288  		n.joinCnt++
   289  	}
   290  
   291  	return nil
   292  }
   293  
   294  func (n *network) leaveSandbox() {
   295  	n.Lock()
   296  	defer n.Unlock()
   297  	n.joinCnt--
   298  	if n.joinCnt != 0 {
   299  		return
   300  	}
   301  
   302  	n.destroySandbox()
   303  
   304  	n.sboxInit = false
   305  	n.initErr = nil
   306  	for _, s := range n.subnets {
   307  		s.sboxInit = false
   308  		s.initErr = nil
   309  	}
   310  }
   311  
   312  // to be called while holding network lock
   313  func (n *network) destroySandbox() {
   314  	if n.sbox != nil {
   315  		for _, iface := range n.sbox.Interfaces() {
   316  			if err := iface.Remove(); err != nil {
   317  				log.G(context.TODO()).Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
   318  			}
   319  		}
   320  
   321  		for _, s := range n.subnets {
   322  			if s.vxlanName != "" {
   323  				err := deleteInterface(s.vxlanName)
   324  				if err != nil {
   325  					log.G(context.TODO()).Warnf("could not cleanup sandbox properly: %v", err)
   326  				}
   327  			}
   328  		}
   329  
   330  		n.sbox.Destroy()
   331  		n.sbox = nil
   332  	}
   333  }
   334  
   335  func populateVNITbl() {
   336  	filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")),
   337  		// NOTE(cpuguy83): The linter picked up on the fact that this walk function was not using this error argument
   338  		// That seems wrong... however I'm not familiar with this code or if that error matters
   339  		func(path string, _ os.DirEntry, _ error) error {
   340  			_, fname := filepath.Split(path)
   341  
   342  			if len(strings.Split(fname, "-")) <= 1 {
   343  				return nil
   344  			}
   345  
   346  			n, err := netns.GetFromPath(path)
   347  			if err != nil {
   348  				log.G(context.TODO()).Errorf("Could not open namespace path %s during vni population: %v", path, err)
   349  				return nil
   350  			}
   351  			defer n.Close()
   352  
   353  			nlh, err := netlink.NewHandleAt(n, unix.NETLINK_ROUTE)
   354  			if err != nil {
   355  				log.G(context.TODO()).Errorf("Could not open netlink handle during vni population for ns %s: %v", path, err)
   356  				return nil
   357  			}
   358  			defer nlh.Close()
   359  
   360  			err = nlh.SetSocketTimeout(soTimeout)
   361  			if err != nil {
   362  				log.G(context.TODO()).Warnf("Failed to set the timeout on the netlink handle sockets for vni table population: %v", err)
   363  			}
   364  
   365  			links, err := nlh.LinkList()
   366  			if err != nil {
   367  				log.G(context.TODO()).Errorf("Failed to list interfaces during vni population for ns %s: %v", path, err)
   368  				return nil
   369  			}
   370  
   371  			for _, l := range links {
   372  				if l.Type() == "vxlan" {
   373  					vniTbl[uint32(l.(*netlink.Vxlan).VxlanId)] = path
   374  				}
   375  			}
   376  
   377  			return nil
   378  		})
   379  }
   380  
   381  func (n *network) generateVxlanName(s *subnet) string {
   382  	id := n.id
   383  	if len(n.id) > 5 {
   384  		id = n.id[:5]
   385  	}
   386  
   387  	return fmt.Sprintf("vx-%06x-%v", s.vni, id)
   388  }
   389  
   390  func (n *network) generateBridgeName(s *subnet) string {
   391  	id := n.id
   392  	if len(n.id) > 5 {
   393  		id = n.id[:5]
   394  	}
   395  
   396  	return n.getBridgeNamePrefix(s) + "-" + id
   397  }
   398  
   399  func (n *network) getBridgeNamePrefix(s *subnet) string {
   400  	return fmt.Sprintf("ov-%06x", s.vni)
   401  }
   402  
   403  func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
   404  	// Try to find this subnet's vni is being used in some
   405  	// other namespace by looking at vniTbl that we just
   406  	// populated in the once init. If a hit is found then
   407  	// it must a stale namespace from previous
   408  	// life. Destroy it completely and reclaim resourced.
   409  	networkMu.Lock()
   410  	path, ok := vniTbl[s.vni]
   411  	networkMu.Unlock()
   412  
   413  	if ok {
   414  		deleteVxlanByVNI(path, s.vni)
   415  		if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
   416  			log.G(context.TODO()).Errorf("unmount of %s failed: %v", path, err)
   417  		}
   418  		os.Remove(path)
   419  
   420  		networkMu.Lock()
   421  		delete(vniTbl, s.vni)
   422  		networkMu.Unlock()
   423  	}
   424  
   425  	// create a bridge and vxlan device for this subnet and move it to the sandbox
   426  	sbox := n.sbox
   427  
   428  	if err := sbox.AddInterface(brName, "br", osl.WithIPv4Address(s.gwIP), osl.WithIsBridge(true)); err != nil {
   429  		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
   430  	}
   431  
   432  	err := createVxlan(vxlanName, s.vni, n.maxMTU())
   433  	if err != nil {
   434  		return err
   435  	}
   436  
   437  	if err := sbox.AddInterface(vxlanName, "vxlan", osl.WithMaster(brName)); err != nil {
   438  		// If adding vxlan device to the overlay namespace fails, remove the bridge interface we
   439  		// already added to the namespace. This allows the caller to try the setup again.
   440  		for _, iface := range sbox.Interfaces() {
   441  			if iface.SrcName() == brName {
   442  				if ierr := iface.Remove(); ierr != nil {
   443  					log.G(context.TODO()).Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr)
   444  				}
   445  			}
   446  		}
   447  
   448  		// Also, delete the vxlan interface. Since a global vni id is associated
   449  		// with the vxlan interface, an orphaned vxlan interface will result in
   450  		// failure of vxlan device creation if the vni is assigned to some other
   451  		// network.
   452  		if deleteErr := deleteInterface(vxlanName); deleteErr != nil {
   453  			log.G(context.TODO()).Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err)
   454  		}
   455  		return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
   456  	}
   457  
   458  	if err := setDefaultVLAN(sbox); err != nil {
   459  		// not a fatal error
   460  		log.G(context.TODO()).WithError(err).Error("set bridge default vlan failed")
   461  	}
   462  	return nil
   463  }
   464  
   465  func setDefaultVLAN(ns *osl.Namespace) error {
   466  	var brName string
   467  	for _, i := range ns.Interfaces() {
   468  		if i.Bridge() {
   469  			brName = i.DstName()
   470  		}
   471  	}
   472  
   473  	// IFLA_BR_VLAN_DEFAULT_PVID was added in Linux v4.4 (see torvalds/linux@0f963b7), so we can't use netlink for
   474  	// setting this until Docker drops support for CentOS/RHEL 7 (kernel 3.10, eol date: 2024-06-30).
   475  	var innerErr error
   476  	err := ns.InvokeFunc(func() {
   477  		// Contrary to what the sysfs(5) man page says, the entries of /sys/class/net
   478  		// represent the networking devices visible in the network namespace of the
   479  		// process which mounted the sysfs filesystem, irrespective of the network
   480  		// namespace of the process accessing the directory. Remount sysfs in order to
   481  		// see the network devices in sbox's network namespace, making sure the mount
   482  		// doesn't propagate back.
   483  		//
   484  		// The Linux implementation of (osl.Sandbox).InvokeFunc() runs the function in a
   485  		// dedicated goroutine. The effects of unshare(CLONE_NEWNS) on a thread cannot
   486  		// be reverted so the thread needs to be terminated once the goroutine is
   487  		// finished.
   488  		runtime.LockOSThread()
   489  		if err := unix.Unshare(unix.CLONE_NEWNS); err != nil {
   490  			innerErr = os.NewSyscallError("unshare", err)
   491  			return
   492  		}
   493  		if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
   494  			innerErr = &os.PathError{Op: "mount", Path: "/", Err: err}
   495  			return
   496  		}
   497  		if err := unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
   498  			innerErr = &os.PathError{Op: "mount", Path: "/sys", Err: err}
   499  			return
   500  		}
   501  
   502  		path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
   503  		data := []byte{'0', '\n'}
   504  
   505  		if err := os.WriteFile(path, data, 0o644); err != nil {
   506  			innerErr = fmt.Errorf("failed to enable default vlan on bridge %s: %w", brName, err)
   507  			return
   508  		}
   509  	})
   510  	if err != nil {
   511  		return err
   512  	}
   513  	return innerErr
   514  }
   515  
   516  // Must be called with the network lock
   517  func (n *network) initSubnetSandbox(s *subnet) error {
   518  	brName := n.generateBridgeName(s)
   519  	vxlanName := n.generateVxlanName(s)
   520  
   521  	// Program iptables rules for mandatory encryption of the secure
   522  	// network, or clean up leftover rules for a stale secure network which
   523  	// was previously assigned the same VNI.
   524  	if err := programMangle(s.vni, n.secure); err != nil {
   525  		return err
   526  	}
   527  	if err := programInput(s.vni, n.secure); err != nil {
   528  		if n.secure {
   529  			return multierror.Append(err, programMangle(s.vni, false))
   530  		}
   531  	}
   532  
   533  	if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
   534  		return err
   535  	}
   536  
   537  	s.vxlanName = vxlanName
   538  	s.brName = brName
   539  
   540  	return nil
   541  }
   542  
   543  func (n *network) cleanupStaleSandboxes() {
   544  	filepath.WalkDir(filepath.Dir(osl.GenerateKey("walk")),
   545  		func(path string, _ os.DirEntry, _ error) error {
   546  			_, fname := filepath.Split(path)
   547  
   548  			pList := strings.Split(fname, "-")
   549  			if len(pList) <= 1 {
   550  				return nil
   551  			}
   552  
   553  			pattern := pList[1]
   554  			if strings.Contains(n.id, pattern) {
   555  				// Delete all vnis
   556  				deleteVxlanByVNI(path, 0)
   557  				unix.Unmount(path, unix.MNT_DETACH)
   558  				os.Remove(path)
   559  
   560  				// Now that we have destroyed this
   561  				// sandbox, remove all references to
   562  				// it in vniTbl so that we don't
   563  				// inadvertently destroy the sandbox
   564  				// created in this life.
   565  				networkMu.Lock()
   566  				for vni, tblPath := range vniTbl {
   567  					if tblPath == path {
   568  						delete(vniTbl, vni)
   569  					}
   570  				}
   571  				networkMu.Unlock()
   572  			}
   573  
   574  			return nil
   575  		})
   576  }
   577  
   578  func (n *network) initSandbox() error {
   579  	n.initEpoch++
   580  
   581  	// If there are any stale sandboxes related to this network
   582  	// from previous daemon life clean it up here
   583  	n.cleanupStaleSandboxes()
   584  
   585  	key := osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
   586  	sbox, err := osl.NewSandbox(key, true, false)
   587  	if err != nil {
   588  		return fmt.Errorf("could not get network sandbox: %v", err)
   589  	}
   590  
   591  	// this is needed to let the peerAdd configure the sandbox
   592  	n.sbox = sbox
   593  
   594  	return nil
   595  }
   596  
   597  func (d *driver) network(nid string) *network {
   598  	d.Lock()
   599  	n := d.networks[nid]
   600  	d.Unlock()
   601  
   602  	return n
   603  }
   604  
   605  func (n *network) sandbox() *osl.Namespace {
   606  	n.Lock()
   607  	defer n.Unlock()
   608  	return n.sbox
   609  }
   610  
   611  // getSubnetforIP returns the subnet to which the given IP belongs
   612  func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
   613  	for _, s := range n.subnets {
   614  		// first check if the mask lengths are the same
   615  		i, _ := s.subnetIP.Mask.Size()
   616  		j, _ := ip.Mask.Size()
   617  		if i != j {
   618  			continue
   619  		}
   620  		if s.subnetIP.Contains(ip.IP) {
   621  			return s
   622  		}
   623  	}
   624  	return nil
   625  }