github.com/rawahars/moby@v24.0.4+incompatible/libnetwork/drivers/overlay/peerdb.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"fmt"
     8  	"net"
     9  	"sync"
    10  	"syscall"
    11  
    12  	"github.com/docker/docker/libnetwork/internal/setmatrix"
    13  	"github.com/docker/docker/libnetwork/osl"
    14  	"github.com/sirupsen/logrus"
    15  )
    16  
    17  const ovPeerTable = "overlay_peer_table"
    18  
    19  type peerKey struct {
    20  	peerIP  net.IP
    21  	peerMac net.HardwareAddr
    22  }
    23  
    24  type peerEntry struct {
    25  	eid        string
    26  	vtep       net.IP
    27  	peerIPMask net.IPMask
    28  	isLocal    bool
    29  }
    30  
    31  func (p *peerEntry) MarshalDB() peerEntryDB {
    32  	ones, bits := p.peerIPMask.Size()
    33  	return peerEntryDB{
    34  		eid:            p.eid,
    35  		vtep:           p.vtep.String(),
    36  		peerIPMaskOnes: ones,
    37  		peerIPMaskBits: bits,
    38  		isLocal:        p.isLocal,
    39  	}
    40  }
    41  
    42  // This the structure saved into the set (SetMatrix), due to the implementation of it
    43  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    44  // strings
    45  type peerEntryDB struct {
    46  	eid            string
    47  	vtep           string
    48  	peerIPMaskOnes int
    49  	peerIPMaskBits int
    50  	isLocal        bool
    51  }
    52  
    53  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    54  	return peerEntry{
    55  		eid:        p.eid,
    56  		vtep:       net.ParseIP(p.vtep),
    57  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    58  		isLocal:    p.isLocal,
    59  	}
    60  }
    61  
    62  type peerMap struct {
    63  	// set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks
    64  	mp setmatrix.SetMatrix[peerEntryDB]
    65  	sync.Mutex
    66  }
    67  
    68  type peerNetworkMap struct {
    69  	// map with key peerKey
    70  	mp map[string]*peerMap
    71  	sync.Mutex
    72  }
    73  
    74  func (pKey peerKey) String() string {
    75  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    76  }
    77  
    78  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    79  	ipB, err := state.Token(true, nil)
    80  	if err != nil {
    81  		return err
    82  	}
    83  
    84  	pKey.peerIP = net.ParseIP(string(ipB))
    85  
    86  	macB, err := state.Token(true, nil)
    87  	if err != nil {
    88  		return err
    89  	}
    90  
    91  	pKey.peerMac, err = net.ParseMAC(string(macB))
    92  	return err
    93  }
    94  
    95  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    96  	d.peerDb.Lock()
    97  	nids := []string{}
    98  	for nid := range d.peerDb.mp {
    99  		nids = append(nids, nid)
   100  	}
   101  	d.peerDb.Unlock()
   102  
   103  	for _, nid := range nids {
   104  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   105  			return f(nid, pKey, pEntry)
   106  		})
   107  	}
   108  	return nil
   109  }
   110  
   111  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   112  	d.peerDb.Lock()
   113  	pMap, ok := d.peerDb.mp[nid]
   114  	d.peerDb.Unlock()
   115  
   116  	if !ok {
   117  		return nil
   118  	}
   119  
   120  	mp := map[string]peerEntry{}
   121  	pMap.Lock()
   122  	for _, pKeyStr := range pMap.mp.Keys() {
   123  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   124  		if ok {
   125  			peerEntryDB := entryDBList[0]
   126  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   127  		}
   128  	}
   129  	pMap.Unlock()
   130  
   131  	for pKeyStr, pEntry := range mp {
   132  		var pKey peerKey
   133  		pEntry := pEntry
   134  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   135  			logrus.Warnf("Peer key scan on network %s failed: %v", nid, err)
   136  		}
   137  		if f(&pKey, &pEntry) {
   138  			return nil
   139  		}
   140  	}
   141  
   142  	return nil
   143  }
   144  
   145  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   146  	var pKeyMatched *peerKey
   147  	var pEntryMatched *peerEntry
   148  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   149  		if pKey.peerIP.Equal(peerIP) {
   150  			pKeyMatched = pKey
   151  			pEntryMatched = pEntry
   152  			return true
   153  		}
   154  
   155  		return false
   156  	})
   157  
   158  	if err != nil {
   159  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   160  	}
   161  
   162  	if pKeyMatched == nil || pEntryMatched == nil {
   163  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   164  	}
   165  
   166  	return pKeyMatched, pEntryMatched, nil
   167  }
   168  
   169  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   170  	d.peerDb.Lock()
   171  	pMap, ok := d.peerDb.mp[nid]
   172  	if !ok {
   173  		pMap = &peerMap{}
   174  		d.peerDb.mp[nid] = pMap
   175  	}
   176  	d.peerDb.Unlock()
   177  
   178  	pKey := peerKey{
   179  		peerIP:  peerIP,
   180  		peerMac: peerMac,
   181  	}
   182  
   183  	pEntry := peerEntry{
   184  		eid:        eid,
   185  		vtep:       vtep,
   186  		peerIPMask: peerIPMask,
   187  		isLocal:    isLocal,
   188  	}
   189  
   190  	pMap.Lock()
   191  	defer pMap.Unlock()
   192  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   193  	if i != 1 {
   194  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   195  		s, _ := pMap.mp.String(pKey.String())
   196  		logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   197  	}
   198  	return b, i
   199  }
   200  
   201  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   202  	d.peerDb.Lock()
   203  	pMap, ok := d.peerDb.mp[nid]
   204  	if !ok {
   205  		d.peerDb.Unlock()
   206  		return false, 0
   207  	}
   208  	d.peerDb.Unlock()
   209  
   210  	pKey := peerKey{
   211  		peerIP:  peerIP,
   212  		peerMac: peerMac,
   213  	}
   214  
   215  	pEntry := peerEntry{
   216  		eid:        eid,
   217  		vtep:       vtep,
   218  		peerIPMask: peerIPMask,
   219  		isLocal:    isLocal,
   220  	}
   221  
   222  	pMap.Lock()
   223  	defer pMap.Unlock()
   224  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   225  	if i != 0 {
   226  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   227  		s, _ := pMap.mp.String(pKey.String())
   228  		logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   229  	}
   230  	return b, i
   231  }
   232  
   233  // The overlay uses a lazy initialization approach, this means that when a network is created
   234  // and the driver registered the overlay does not allocate resources till the moment that a
   235  // sandbox is actually created.
   236  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   237  // networkDB has already delivered some events of peers already available on remote nodes,
   238  // these peers are saved into the peerDB and this function is used to properly configure
   239  // the network sandbox with all those peers that got previously notified.
   240  // Note also that this method sends a single message on the channel and the go routine on the
   241  // other side, will atomically loop on the whole table of peers and will program their state
   242  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   243  // new peerAdd or peerDelete gets reordered during the sandbox init.
   244  func (d *driver) initSandboxPeerDB(nid string) {
   245  	d.peerOpMu.Lock()
   246  	defer d.peerOpMu.Unlock()
   247  	if err := d.peerInitOp(nid); err != nil {
   248  		logrus.WithError(err).Warn("Peer init operation failed")
   249  	}
   250  }
   251  
   252  func (d *driver) peerInitOp(nid string) error {
   253  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   254  		// Local entries do not need to be added
   255  		if pEntry.isLocal {
   256  			return false
   257  		}
   258  
   259  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   260  		// return false to loop on all entries
   261  		return false
   262  	})
   263  }
   264  
   265  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   266  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   267  	d.peerOpMu.Lock()
   268  	defer d.peerOpMu.Unlock()
   269  	err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer)
   270  	if err != nil {
   271  		logrus.WithError(err).Warn("Peer add operation failed")
   272  	}
   273  }
   274  
   275  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   276  	if err := validateID(nid, eid); err != nil {
   277  		return err
   278  	}
   279  
   280  	var dbEntries int
   281  	var inserted bool
   282  	if updateDB {
   283  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   284  		if !inserted {
   285  			logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   286  				nid, eid, peerIP, peerMac, localPeer, vtep)
   287  		}
   288  	}
   289  
   290  	// Local peers do not need any further configuration
   291  	if localPeer {
   292  		return nil
   293  	}
   294  
   295  	n := d.network(nid)
   296  	if n == nil {
   297  		return nil
   298  	}
   299  
   300  	sbox := n.sandbox()
   301  	if sbox == nil {
   302  		// We are hitting this case for all the events that are arriving before that the sandbox
   303  		// is being created. The peer got already added into the database and the sanbox init will
   304  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   305  		return nil
   306  	}
   307  
   308  	IP := &net.IPNet{
   309  		IP:   peerIP,
   310  		Mask: peerIPMask,
   311  	}
   312  
   313  	s := n.getSubnetforIP(IP)
   314  	if s == nil {
   315  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   316  	}
   317  
   318  	if err := n.joinSandbox(s, false); err != nil {
   319  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   320  	}
   321  
   322  	if err := d.checkEncryption(nid, vtep, false, true); err != nil {
   323  		logrus.Warn(err)
   324  	}
   325  
   326  	// Add neighbor entry for the peer IP
   327  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
   328  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   329  			// We are in the transient case so only the first configuration is programmed into the kernel
   330  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   331  			// Note we are skipping also the next configuration
   332  			return nil
   333  		}
   334  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   335  	}
   336  
   337  	// Add fdb entry to the bridge for the peer mac
   338  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName),
   339  		sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil {
   340  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   341  	}
   342  
   343  	return nil
   344  }
   345  
   346  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   347  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   348  	d.peerOpMu.Lock()
   349  	defer d.peerOpMu.Unlock()
   350  	err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   351  	if err != nil {
   352  		logrus.WithError(err).Warn("Peer delete operation failed")
   353  	}
   354  }
   355  
   356  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   357  	if err := validateID(nid, eid); err != nil {
   358  		return err
   359  	}
   360  
   361  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   362  	if !deleted {
   363  		logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   364  			nid, eid, peerIP, peerMac, localPeer, vtep)
   365  	}
   366  
   367  	n := d.network(nid)
   368  	if n == nil {
   369  		return nil
   370  	}
   371  
   372  	sbox := n.sandbox()
   373  	if sbox == nil {
   374  		return nil
   375  	}
   376  
   377  	if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil {
   378  		logrus.Warn(err)
   379  	}
   380  
   381  	// Local peers do not have any local configuration to delete
   382  	if !localPeer {
   383  		// Remove fdb entry to the bridge for the peer mac
   384  		if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil {
   385  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   386  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   387  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   388  				return nil
   389  			}
   390  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   391  		}
   392  
   393  		// Delete neighbor entry for the peer IP
   394  		if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil {
   395  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   396  		}
   397  	}
   398  
   399  	if dbEntries == 0 {
   400  		return nil
   401  	}
   402  
   403  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   404  	// configuration active in the kernel.
   405  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   406  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   407  	if err != nil {
   408  		logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   409  		return err
   410  	}
   411  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   412  }
   413  
   414  func (d *driver) peerFlush(nid string) {
   415  	d.peerOpMu.Lock()
   416  	defer d.peerOpMu.Unlock()
   417  	if err := d.peerFlushOp(nid); err != nil {
   418  		logrus.WithError(err).Warn("Peer flush operation failed")
   419  	}
   420  }
   421  
   422  func (d *driver) peerFlushOp(nid string) error {
   423  	d.peerDb.Lock()
   424  	defer d.peerDb.Unlock()
   425  	_, ok := d.peerDb.mp[nid]
   426  	if !ok {
   427  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   428  	}
   429  	delete(d.peerDb.mp, nid)
   430  	return nil
   431  }
   432  
   433  func (d *driver) peerDBUpdateSelf() {
   434  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   435  		if pEntry.isLocal {
   436  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   437  		}
   438  		return false
   439  	})
   440  }