github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/drivers/overlay/peerdb.go (about)

     1  //go:build linux
     2  
     3  package overlay
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"net"
     9  	"sync"
    10  	"syscall"
    11  
    12  	"github.com/containerd/log"
    13  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/internal/setmatrix"
    14  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/osl"
    15  )
    16  
    17  const ovPeerTable = "overlay_peer_table"
    18  
    19  type peerKey struct {
    20  	peerIP  net.IP
    21  	peerMac net.HardwareAddr
    22  }
    23  
    24  type peerEntry struct {
    25  	eid        string
    26  	vtep       net.IP
    27  	peerIPMask net.IPMask
    28  	isLocal    bool
    29  }
    30  
    31  func (p *peerEntry) MarshalDB() peerEntryDB {
    32  	ones, bits := p.peerIPMask.Size()
    33  	return peerEntryDB{
    34  		eid:            p.eid,
    35  		vtep:           p.vtep.String(),
    36  		peerIPMaskOnes: ones,
    37  		peerIPMaskBits: bits,
    38  		isLocal:        p.isLocal,
    39  	}
    40  }
    41  
    42  // This the structure saved into the set (SetMatrix), due to the implementation of it
    43  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    44  // strings
    45  type peerEntryDB struct {
    46  	eid            string
    47  	vtep           string
    48  	peerIPMaskOnes int
    49  	peerIPMaskBits int
    50  	isLocal        bool
    51  }
    52  
    53  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    54  	return peerEntry{
    55  		eid:        p.eid,
    56  		vtep:       net.ParseIP(p.vtep),
    57  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    58  		isLocal:    p.isLocal,
    59  	}
    60  }
    61  
    62  type peerMap struct {
    63  	// set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks
    64  	mp setmatrix.SetMatrix[peerEntryDB]
    65  	sync.Mutex
    66  }
    67  
    68  type peerNetworkMap struct {
    69  	// map with key peerKey
    70  	mp map[string]*peerMap
    71  	sync.Mutex
    72  }
    73  
    74  func (pKey peerKey) String() string {
    75  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    76  }
    77  
    78  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    79  	ipB, err := state.Token(true, nil)
    80  	if err != nil {
    81  		return err
    82  	}
    83  
    84  	pKey.peerIP = net.ParseIP(string(ipB))
    85  
    86  	macB, err := state.Token(true, nil)
    87  	if err != nil {
    88  		return err
    89  	}
    90  
    91  	pKey.peerMac, err = net.ParseMAC(string(macB))
    92  	return err
    93  }
    94  
    95  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    96  	d.peerDb.Lock()
    97  	nids := []string{}
    98  	for nid := range d.peerDb.mp {
    99  		nids = append(nids, nid)
   100  	}
   101  	d.peerDb.Unlock()
   102  
   103  	for _, nid := range nids {
   104  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   105  			return f(nid, pKey, pEntry)
   106  		})
   107  	}
   108  	return nil
   109  }
   110  
   111  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   112  	d.peerDb.Lock()
   113  	pMap, ok := d.peerDb.mp[nid]
   114  	d.peerDb.Unlock()
   115  
   116  	if !ok {
   117  		return nil
   118  	}
   119  
   120  	mp := map[string]peerEntry{}
   121  	pMap.Lock()
   122  	for _, pKeyStr := range pMap.mp.Keys() {
   123  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   124  		if ok {
   125  			peerEntryDB := entryDBList[0]
   126  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   127  		}
   128  	}
   129  	pMap.Unlock()
   130  
   131  	for pKeyStr, pEntry := range mp {
   132  		var pKey peerKey
   133  		pEntry := pEntry
   134  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   135  			log.G(context.TODO()).Warnf("Peer key scan on network %s failed: %v", nid, err)
   136  		}
   137  		if f(&pKey, &pEntry) {
   138  			return nil
   139  		}
   140  	}
   141  
   142  	return nil
   143  }
   144  
   145  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   146  	var pKeyMatched *peerKey
   147  	var pEntryMatched *peerEntry
   148  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   149  		if pKey.peerIP.Equal(peerIP) {
   150  			pKeyMatched = pKey
   151  			pEntryMatched = pEntry
   152  			return true
   153  		}
   154  
   155  		return false
   156  	})
   157  	if err != nil {
   158  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   159  	}
   160  
   161  	if pKeyMatched == nil || pEntryMatched == nil {
   162  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   163  	}
   164  
   165  	return pKeyMatched, pEntryMatched, nil
   166  }
   167  
   168  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   169  	d.peerDb.Lock()
   170  	pMap, ok := d.peerDb.mp[nid]
   171  	if !ok {
   172  		pMap = &peerMap{}
   173  		d.peerDb.mp[nid] = pMap
   174  	}
   175  	d.peerDb.Unlock()
   176  
   177  	pKey := peerKey{
   178  		peerIP:  peerIP,
   179  		peerMac: peerMac,
   180  	}
   181  
   182  	pEntry := peerEntry{
   183  		eid:        eid,
   184  		vtep:       vtep,
   185  		peerIPMask: peerIPMask,
   186  		isLocal:    isLocal,
   187  	}
   188  
   189  	pMap.Lock()
   190  	defer pMap.Unlock()
   191  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   192  	if i != 1 {
   193  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   194  		s, _ := pMap.mp.String(pKey.String())
   195  		log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   196  	}
   197  	return b, i
   198  }
   199  
   200  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   201  	d.peerDb.Lock()
   202  	pMap, ok := d.peerDb.mp[nid]
   203  	if !ok {
   204  		d.peerDb.Unlock()
   205  		return false, 0
   206  	}
   207  	d.peerDb.Unlock()
   208  
   209  	pKey := peerKey{
   210  		peerIP:  peerIP,
   211  		peerMac: peerMac,
   212  	}
   213  
   214  	pEntry := peerEntry{
   215  		eid:        eid,
   216  		vtep:       vtep,
   217  		peerIPMask: peerIPMask,
   218  		isLocal:    isLocal,
   219  	}
   220  
   221  	pMap.Lock()
   222  	defer pMap.Unlock()
   223  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   224  	if i != 0 {
   225  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   226  		s, _ := pMap.mp.String(pKey.String())
   227  		log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   228  	}
   229  	return b, i
   230  }
   231  
   232  // The overlay uses a lazy initialization approach, this means that when a network is created
   233  // and the driver registered the overlay does not allocate resources till the moment that a
   234  // sandbox is actually created.
   235  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   236  // networkDB has already delivered some events of peers already available on remote nodes,
   237  // these peers are saved into the peerDB and this function is used to properly configure
   238  // the network sandbox with all those peers that got previously notified.
   239  // Note also that this method sends a single message on the channel and the go routine on the
   240  // other side, will atomically loop on the whole table of peers and will program their state
   241  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   242  // new peerAdd or peerDelete gets reordered during the sandbox init.
   243  func (d *driver) initSandboxPeerDB(nid string) {
   244  	d.peerOpMu.Lock()
   245  	defer d.peerOpMu.Unlock()
   246  	if err := d.peerInitOp(nid); err != nil {
   247  		log.G(context.TODO()).WithError(err).Warn("Peer init operation failed")
   248  	}
   249  }
   250  
   251  func (d *driver) peerInitOp(nid string) error {
   252  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   253  		// Local entries do not need to be added
   254  		if pEntry.isLocal {
   255  			return false
   256  		}
   257  
   258  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   259  		// return false to loop on all entries
   260  		return false
   261  	})
   262  }
   263  
   264  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   265  	d.peerOpMu.Lock()
   266  	defer d.peerOpMu.Unlock()
   267  	err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer)
   268  	if err != nil {
   269  		log.G(context.TODO()).WithError(err).Warn("Peer add operation failed")
   270  	}
   271  }
   272  
   273  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   274  	if err := validateID(nid, eid); err != nil {
   275  		return err
   276  	}
   277  
   278  	var dbEntries int
   279  	var inserted bool
   280  	if updateDB {
   281  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   282  		if !inserted {
   283  			log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   284  				nid, eid, peerIP, peerMac, localPeer, vtep)
   285  		}
   286  	}
   287  
   288  	// Local peers do not need any further configuration
   289  	if localPeer {
   290  		return nil
   291  	}
   292  
   293  	n := d.network(nid)
   294  	if n == nil {
   295  		return nil
   296  	}
   297  
   298  	sbox := n.sandbox()
   299  	if sbox == nil {
   300  		// We are hitting this case for all the events that are arriving before that the sandbox
   301  		// is being created. The peer got already added into the database and the sanbox init will
   302  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   303  		return nil
   304  	}
   305  
   306  	IP := &net.IPNet{
   307  		IP:   peerIP,
   308  		Mask: peerIPMask,
   309  	}
   310  
   311  	s := n.getSubnetforIP(IP)
   312  	if s == nil {
   313  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   314  	}
   315  
   316  	if err := n.joinSandbox(s, false); err != nil {
   317  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   318  	}
   319  
   320  	if err := d.checkEncryption(nid, vtep, false, true); err != nil {
   321  		log.G(context.TODO()).Warn(err)
   322  	}
   323  
   324  	// Add neighbor entry for the peer IP
   325  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, osl.WithLinkName(s.vxlanName)); err != nil {
   326  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   327  			// We are in the transient case so only the first configuration is programmed into the kernel
   328  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   329  			// Note we are skipping also the next configuration
   330  			return nil
   331  		}
   332  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   333  	}
   334  
   335  	// Add fdb entry to the bridge for the peer mac
   336  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
   337  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   338  	}
   339  
   340  	return nil
   341  }
   342  
   343  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   344  	d.peerOpMu.Lock()
   345  	defer d.peerOpMu.Unlock()
   346  	err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   347  	if err != nil {
   348  		log.G(context.TODO()).WithError(err).Warn("Peer delete operation failed")
   349  	}
   350  }
   351  
   352  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   353  	if err := validateID(nid, eid); err != nil {
   354  		return err
   355  	}
   356  
   357  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   358  	if !deleted {
   359  		log.G(context.TODO()).Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   360  			nid, eid, peerIP, peerMac, localPeer, vtep)
   361  	}
   362  
   363  	n := d.network(nid)
   364  	if n == nil {
   365  		return nil
   366  	}
   367  
   368  	sbox := n.sandbox()
   369  	if sbox == nil {
   370  		return nil
   371  	}
   372  
   373  	if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil {
   374  		log.G(context.TODO()).Warn(err)
   375  	}
   376  
   377  	// Local peers do not have any local configuration to delete
   378  	if !localPeer {
   379  		// Remove fdb entry to the bridge for the peer mac
   380  		if err := sbox.DeleteNeighbor(vtep, peerMac); err != nil {
   381  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   382  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   383  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   384  				return nil
   385  			}
   386  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   387  		}
   388  
   389  		// Delete neighbor entry for the peer IP
   390  		if err := sbox.DeleteNeighbor(peerIP, peerMac); err != nil {
   391  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   392  		}
   393  	}
   394  
   395  	if dbEntries == 0 {
   396  		return nil
   397  	}
   398  
   399  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   400  	// configuration active in the kernel.
   401  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   402  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   403  	if err != nil {
   404  		log.G(context.TODO()).Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   405  		return err
   406  	}
   407  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   408  }
   409  
   410  func (d *driver) peerFlush(nid string) {
   411  	d.peerOpMu.Lock()
   412  	defer d.peerOpMu.Unlock()
   413  	if err := d.peerFlushOp(nid); err != nil {
   414  		log.G(context.TODO()).WithError(err).Warn("Peer flush operation failed")
   415  	}
   416  }
   417  
   418  func (d *driver) peerFlushOp(nid string) error {
   419  	d.peerDb.Lock()
   420  	defer d.peerDb.Unlock()
   421  	_, ok := d.peerDb.mp[nid]
   422  	if !ok {
   423  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   424  	}
   425  	delete(d.peerDb.mp, nid)
   426  	return nil
   427  }
   428  
   429  func (d *driver) peerDBUpdateSelf() {
   430  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   431  		if pEntry.isLocal {
   432  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   433  		}
   434  		return false
   435  	})
   436  }