github.com/rish1988/moby@v25.0.2+incompatible/libnetwork/drivers/overlay/peerdb.go (about)

     1  // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
     2  //go:build go1.19 && linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"sync"
    11  	"syscall"
    12  
    13  	"github.com/containerd/log"
    14  	"github.com/docker/docker/libnetwork/internal/setmatrix"
    15  	"github.com/docker/docker/libnetwork/osl"
    16  )
    17  
    18  const ovPeerTable = "overlay_peer_table"
    19  
    20  type peerKey struct {
    21  	peerIP  net.IP
    22  	peerMac net.HardwareAddr
    23  }
    24  
    25  type peerEntry struct {
    26  	eid        string
    27  	vtep       net.IP
    28  	peerIPMask net.IPMask
    29  	isLocal    bool
    30  }
    31  
    32  func (p *peerEntry) MarshalDB() peerEntryDB {
    33  	ones, bits := p.peerIPMask.Size()
    34  	return peerEntryDB{
    35  		eid:            p.eid,
    36  		vtep:           p.vtep.String(),
    37  		peerIPMaskOnes: ones,
    38  		peerIPMaskBits: bits,
    39  		isLocal:        p.isLocal,
    40  	}
    41  }
    42  
    43  // This the structure saved into the set (SetMatrix), due to the implementation of it
    44  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    45  // strings
    46  type peerEntryDB struct {
    47  	eid            string
    48  	vtep           string
    49  	peerIPMaskOnes int
    50  	peerIPMaskBits int
    51  	isLocal        bool
    52  }
    53  
    54  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    55  	return peerEntry{
    56  		eid:        p.eid,
    57  		vtep:       net.ParseIP(p.vtep),
    58  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    59  		isLocal:    p.isLocal,
    60  	}
    61  }
    62  
    63  type peerMap struct {
    64  	// set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks
    65  	mp setmatrix.SetMatrix[peerEntryDB]
    66  	sync.Mutex
    67  }
    68  
    69  type peerNetworkMap struct {
    70  	// map with key peerKey
    71  	mp map[string]*peerMap
    72  	sync.Mutex
    73  }
    74  
    75  func (pKey peerKey) String() string {
    76  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    77  }
    78  
    79  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    80  	ipB, err := state.Token(true, nil)
    81  	if err != nil {
    82  		return err
    83  	}
    84  
    85  	pKey.peerIP = net.ParseIP(string(ipB))
    86  
    87  	macB, err := state.Token(true, nil)
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	pKey.peerMac, err = net.ParseMAC(string(macB))
    93  	return err
    94  }
    95  
    96  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    97  	d.peerDb.Lock()
    98  	nids := []string{}
    99  	for nid := range d.peerDb.mp {
   100  		nids = append(nids, nid)
   101  	}
   102  	d.peerDb.Unlock()
   103  
   104  	for _, nid := range nids {
   105  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   106  			return f(nid, pKey, pEntry)
   107  		})
   108  	}
   109  	return nil
   110  }
   111  
   112  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   113  	d.peerDb.Lock()
   114  	pMap, ok := d.peerDb.mp[nid]
   115  	d.peerDb.Unlock()
   116  
   117  	if !ok {
   118  		return nil
   119  	}
   120  
   121  	mp := map[string]peerEntry{}
   122  	pMap.Lock()
   123  	for _, pKeyStr := range pMap.mp.Keys() {
   124  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   125  		if ok {
   126  			peerEntryDB := entryDBList[0]
   127  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   128  		}
   129  	}
   130  	pMap.Unlock()
   131  
   132  	for pKeyStr, pEntry := range mp {
   133  		var pKey peerKey
   134  		pEntry := pEntry
   135  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   136  			log.G(context.TODO()).Warnf("Peer key scan on network %s failed: %v", nid, err)
   137  		}
   138  		if f(&pKey, &pEntry) {
   139  			return nil
   140  		}
   141  	}
   142  
   143  	return nil
   144  }
   145  
   146  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   147  	var pKeyMatched *peerKey
   148  	var pEntryMatched *peerEntry
   149  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   150  		if pKey.peerIP.Equal(peerIP) {
   151  			pKeyMatched = pKey
   152  			pEntryMatched = pEntry
   153  			return true
   154  		}
   155  
   156  		return false
   157  	})
   158  	if err != nil {
   159  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   160  	}
   161  
   162  	if pKeyMatched == nil || pEntryMatched == nil {
   163  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   164  	}
   165  
   166  	return pKeyMatched, pEntryMatched, nil
   167  }
   168  
   169  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   170  	d.peerDb.Lock()
   171  	pMap, ok := d.peerDb.mp[nid]
   172  	if !ok {
   173  		pMap = &peerMap{}
   174  		d.peerDb.mp[nid] = pMap
   175  	}
   176  	d.peerDb.Unlock()
   177  
   178  	pKey := peerKey{
   179  		peerIP:  peerIP,
   180  		peerMac: peerMac,
   181  	}
   182  
   183  	pEntry := peerEntry{
   184  		eid:        eid,
   185  		vtep:       vtep,
   186  		peerIPMask: peerIPMask,
   187  		isLocal:    isLocal,
   188  	}
   189  
   190  	pMap.Lock()
   191  	defer pMap.Unlock()
   192  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   193  	if i != 1 {
   194  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   195  		s, _ := pMap.mp.String(pKey.String())
   196  		log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   197  	}
   198  	return b, i
   199  }
   200  
   201  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   202  	d.peerDb.Lock()
   203  	pMap, ok := d.peerDb.mp[nid]
   204  	if !ok {
   205  		d.peerDb.Unlock()
   206  		return false, 0
   207  	}
   208  	d.peerDb.Unlock()
   209  
   210  	pKey := peerKey{
   211  		peerIP:  peerIP,
   212  		peerMac: peerMac,
   213  	}
   214  
   215  	pEntry := peerEntry{
   216  		eid:        eid,
   217  		vtep:       vtep,
   218  		peerIPMask: peerIPMask,
   219  		isLocal:    isLocal,
   220  	}
   221  
   222  	pMap.Lock()
   223  	defer pMap.Unlock()
   224  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   225  	if i != 0 {
   226  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   227  		s, _ := pMap.mp.String(pKey.String())
   228  		log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   229  	}
   230  	return b, i
   231  }
   232  
   233  // The overlay uses a lazy initialization approach, this means that when a network is created
   234  // and the driver registered the overlay does not allocate resources till the moment that a
   235  // sandbox is actually created.
   236  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   237  // networkDB has already delivered some events of peers already available on remote nodes,
   238  // these peers are saved into the peerDB and this function is used to properly configure
   239  // the network sandbox with all those peers that got previously notified.
   240  // Note also that this method sends a single message on the channel and the go routine on the
   241  // other side, will atomically loop on the whole table of peers and will program their state
   242  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   243  // new peerAdd or peerDelete gets reordered during the sandbox init.
   244  func (d *driver) initSandboxPeerDB(nid string) {
   245  	d.peerOpMu.Lock()
   246  	defer d.peerOpMu.Unlock()
   247  	if err := d.peerInitOp(nid); err != nil {
   248  		log.G(context.TODO()).WithError(err).Warn("Peer init operation failed")
   249  	}
   250  }
   251  
   252  func (d *driver) peerInitOp(nid string) error {
   253  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   254  		// Local entries do not need to be added
   255  		if pEntry.isLocal {
   256  			return false
   257  		}
   258  
   259  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   260  		// return false to loop on all entries
   261  		return false
   262  	})
   263  }
   264  
   265  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   266  	d.peerOpMu.Lock()
   267  	defer d.peerOpMu.Unlock()
   268  	err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer)
   269  	if err != nil {
   270  		log.G(context.TODO()).WithError(err).Warn("Peer add operation failed")
   271  	}
   272  }
   273  
   274  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   275  	if err := validateID(nid, eid); err != nil {
   276  		return err
   277  	}
   278  
   279  	var dbEntries int
   280  	var inserted bool
   281  	if updateDB {
   282  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   283  		if !inserted {
   284  			log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   285  				nid, eid, peerIP, peerMac, localPeer, vtep)
   286  		}
   287  	}
   288  
   289  	// Local peers do not need any further configuration
   290  	if localPeer {
   291  		return nil
   292  	}
   293  
   294  	n := d.network(nid)
   295  	if n == nil {
   296  		return nil
   297  	}
   298  
   299  	sbox := n.sandbox()
   300  	if sbox == nil {
   301  		// We are hitting this case for all the events that are arriving before that the sandbox
   302  		// is being created. The peer got already added into the database and the sanbox init will
   303  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   304  		return nil
   305  	}
   306  
   307  	IP := &net.IPNet{
   308  		IP:   peerIP,
   309  		Mask: peerIPMask,
   310  	}
   311  
   312  	s := n.getSubnetforIP(IP)
   313  	if s == nil {
   314  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   315  	}
   316  
   317  	if err := n.joinSandbox(s, false); err != nil {
   318  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   319  	}
   320  
   321  	if err := d.checkEncryption(nid, vtep, false, true); err != nil {
   322  		log.G(context.TODO()).Warn(err)
   323  	}
   324  
   325  	// Add neighbor entry for the peer IP
   326  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, osl.WithLinkName(s.vxlanName)); err != nil {
   327  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   328  			// We are in the transient case so only the first configuration is programmed into the kernel
   329  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   330  			// Note we are skipping also the next configuration
   331  			return nil
   332  		}
   333  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   334  	}
   335  
   336  	// Add fdb entry to the bridge for the peer mac
   337  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
   338  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   339  	}
   340  
   341  	return nil
   342  }
   343  
   344  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   345  	d.peerOpMu.Lock()
   346  	defer d.peerOpMu.Unlock()
   347  	err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   348  	if err != nil {
   349  		log.G(context.TODO()).WithError(err).Warn("Peer delete operation failed")
   350  	}
   351  }
   352  
   353  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   354  	if err := validateID(nid, eid); err != nil {
   355  		return err
   356  	}
   357  
   358  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   359  	if !deleted {
   360  		log.G(context.TODO()).Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   361  			nid, eid, peerIP, peerMac, localPeer, vtep)
   362  	}
   363  
   364  	n := d.network(nid)
   365  	if n == nil {
   366  		return nil
   367  	}
   368  
   369  	sbox := n.sandbox()
   370  	if sbox == nil {
   371  		return nil
   372  	}
   373  
   374  	if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil {
   375  		log.G(context.TODO()).Warn(err)
   376  	}
   377  
   378  	// Local peers do not have any local configuration to delete
   379  	if !localPeer {
   380  		// Remove fdb entry to the bridge for the peer mac
   381  		if err := sbox.DeleteNeighbor(vtep, peerMac); err != nil {
   382  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   383  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   384  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   385  				return nil
   386  			}
   387  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   388  		}
   389  
   390  		// Delete neighbor entry for the peer IP
   391  		if err := sbox.DeleteNeighbor(peerIP, peerMac); err != nil {
   392  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   393  		}
   394  	}
   395  
   396  	if dbEntries == 0 {
   397  		return nil
   398  	}
   399  
   400  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   401  	// configuration active in the kernel.
   402  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   403  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   404  	if err != nil {
   405  		log.G(context.TODO()).Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   406  		return err
   407  	}
   408  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   409  }
   410  
   411  func (d *driver) peerFlush(nid string) {
   412  	d.peerOpMu.Lock()
   413  	defer d.peerOpMu.Unlock()
   414  	if err := d.peerFlushOp(nid); err != nil {
   415  		log.G(context.TODO()).WithError(err).Warn("Peer flush operation failed")
   416  	}
   417  }
   418  
   419  func (d *driver) peerFlushOp(nid string) error {
   420  	d.peerDb.Lock()
   421  	defer d.peerDb.Unlock()
   422  	_, ok := d.peerDb.mp[nid]
   423  	if !ok {
   424  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   425  	}
   426  	delete(d.peerDb.mp, nid)
   427  	return nil
   428  }
   429  
   430  func (d *driver) peerDBUpdateSelf() {
   431  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   432  		if pEntry.isLocal {
   433  			pEntry.vtep = d.advertiseAddress
   434  		}
   435  		return false
   436  	})
   437  }