github.com/adityamillind98/moby@v23.0.0-rc.4+incompatible/libnetwork/drivers/overlay/peerdb.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"sync"
    11  	"syscall"
    12  
    13  	"github.com/docker/docker/libnetwork/internal/caller"
    14  	"github.com/docker/docker/libnetwork/internal/setmatrix"
    15  	"github.com/docker/docker/libnetwork/osl"
    16  	"github.com/sirupsen/logrus"
    17  )
    18  
    19  const ovPeerTable = "overlay_peer_table"
    20  
    21  type peerKey struct {
    22  	peerIP  net.IP
    23  	peerMac net.HardwareAddr
    24  }
    25  
    26  type peerEntry struct {
    27  	eid        string
    28  	vtep       net.IP
    29  	peerIPMask net.IPMask
    30  	isLocal    bool
    31  }
    32  
    33  func (p *peerEntry) MarshalDB() peerEntryDB {
    34  	ones, bits := p.peerIPMask.Size()
    35  	return peerEntryDB{
    36  		eid:            p.eid,
    37  		vtep:           p.vtep.String(),
    38  		peerIPMaskOnes: ones,
    39  		peerIPMaskBits: bits,
    40  		isLocal:        p.isLocal,
    41  	}
    42  }
    43  
    44  // This the structure saved into the set (SetMatrix), due to the implementation of it
    45  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    46  // strings
    47  type peerEntryDB struct {
    48  	eid            string
    49  	vtep           string
    50  	peerIPMaskOnes int
    51  	peerIPMaskBits int
    52  	isLocal        bool
    53  }
    54  
    55  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    56  	return peerEntry{
    57  		eid:        p.eid,
    58  		vtep:       net.ParseIP(p.vtep),
    59  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    60  		isLocal:    p.isLocal,
    61  	}
    62  }
    63  
    64  type peerMap struct {
    65  	// set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks
    66  	mp setmatrix.SetMatrix
    67  	sync.Mutex
    68  }
    69  
    70  type peerNetworkMap struct {
    71  	// map with key peerKey
    72  	mp map[string]*peerMap
    73  	sync.Mutex
    74  }
    75  
    76  func (pKey peerKey) String() string {
    77  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    78  }
    79  
    80  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    81  	ipB, err := state.Token(true, nil)
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	pKey.peerIP = net.ParseIP(string(ipB))
    87  
    88  	macB, err := state.Token(true, nil)
    89  	if err != nil {
    90  		return err
    91  	}
    92  
    93  	pKey.peerMac, err = net.ParseMAC(string(macB))
    94  	return err
    95  }
    96  
    97  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    98  	d.peerDb.Lock()
    99  	nids := []string{}
   100  	for nid := range d.peerDb.mp {
   101  		nids = append(nids, nid)
   102  	}
   103  	d.peerDb.Unlock()
   104  
   105  	for _, nid := range nids {
   106  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   107  			return f(nid, pKey, pEntry)
   108  		})
   109  	}
   110  	return nil
   111  }
   112  
   113  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   114  	d.peerDb.Lock()
   115  	pMap, ok := d.peerDb.mp[nid]
   116  	d.peerDb.Unlock()
   117  
   118  	if !ok {
   119  		return nil
   120  	}
   121  
   122  	mp := map[string]peerEntry{}
   123  	pMap.Lock()
   124  	for _, pKeyStr := range pMap.mp.Keys() {
   125  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   126  		if ok {
   127  			peerEntryDB := entryDBList[0].(peerEntryDB)
   128  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   129  		}
   130  	}
   131  	pMap.Unlock()
   132  
   133  	for pKeyStr, pEntry := range mp {
   134  		var pKey peerKey
   135  		pEntry := pEntry
   136  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   137  			logrus.Warnf("Peer key scan on network %s failed: %v", nid, err)
   138  		}
   139  		if f(&pKey, &pEntry) {
   140  			return nil
   141  		}
   142  	}
   143  
   144  	return nil
   145  }
   146  
   147  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   148  	var pKeyMatched *peerKey
   149  	var pEntryMatched *peerEntry
   150  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   151  		if pKey.peerIP.Equal(peerIP) {
   152  			pKeyMatched = pKey
   153  			pEntryMatched = pEntry
   154  			return true
   155  		}
   156  
   157  		return false
   158  	})
   159  
   160  	if err != nil {
   161  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   162  	}
   163  
   164  	if pKeyMatched == nil || pEntryMatched == nil {
   165  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   166  	}
   167  
   168  	return pKeyMatched, pEntryMatched, nil
   169  }
   170  
   171  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   172  	d.peerDb.Lock()
   173  	pMap, ok := d.peerDb.mp[nid]
   174  	if !ok {
   175  		d.peerDb.mp[nid] = &peerMap{
   176  			mp: setmatrix.NewSetMatrix(),
   177  		}
   178  
   179  		pMap = d.peerDb.mp[nid]
   180  	}
   181  	d.peerDb.Unlock()
   182  
   183  	pKey := peerKey{
   184  		peerIP:  peerIP,
   185  		peerMac: peerMac,
   186  	}
   187  
   188  	pEntry := peerEntry{
   189  		eid:        eid,
   190  		vtep:       vtep,
   191  		peerIPMask: peerIPMask,
   192  		isLocal:    isLocal,
   193  	}
   194  
   195  	pMap.Lock()
   196  	defer pMap.Unlock()
   197  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   198  	if i != 1 {
   199  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   200  		s, _ := pMap.mp.String(pKey.String())
   201  		logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   202  	}
   203  	return b, i
   204  }
   205  
   206  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   207  	d.peerDb.Lock()
   208  	pMap, ok := d.peerDb.mp[nid]
   209  	if !ok {
   210  		d.peerDb.Unlock()
   211  		return false, 0
   212  	}
   213  	d.peerDb.Unlock()
   214  
   215  	pKey := peerKey{
   216  		peerIP:  peerIP,
   217  		peerMac: peerMac,
   218  	}
   219  
   220  	pEntry := peerEntry{
   221  		eid:        eid,
   222  		vtep:       vtep,
   223  		peerIPMask: peerIPMask,
   224  		isLocal:    isLocal,
   225  	}
   226  
   227  	pMap.Lock()
   228  	defer pMap.Unlock()
   229  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   230  	if i != 0 {
   231  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   232  		s, _ := pMap.mp.String(pKey.String())
   233  		logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   234  	}
   235  	return b, i
   236  }
   237  
   238  // The overlay uses a lazy initialization approach, this means that when a network is created
   239  // and the driver registered the overlay does not allocate resources till the moment that a
   240  // sandbox is actually created.
   241  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   242  // networkDB has already delivered some events of peers already available on remote nodes,
   243  // these peers are saved into the peerDB and this function is used to properly configure
   244  // the network sandbox with all those peers that got previously notified.
   245  // Note also that this method sends a single message on the channel and the go routine on the
   246  // other side, will atomically loop on the whole table of peers and will program their state
   247  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   248  // new peerAdd or peerDelete gets reordered during the sandbox init.
   249  func (d *driver) initSandboxPeerDB(nid string) {
   250  	d.peerInit(nid)
   251  }
   252  
   253  type peerOperationType int32
   254  
   255  const (
   256  	peerOperationINIT peerOperationType = iota
   257  	peerOperationADD
   258  	peerOperationDELETE
   259  	peerOperationFLUSH
   260  )
   261  
   262  type peerOperation struct {
   263  	opType     peerOperationType
   264  	networkID  string
   265  	endpointID string
   266  	peerIP     net.IP
   267  	peerIPMask net.IPMask
   268  	peerMac    net.HardwareAddr
   269  	vtepIP     net.IP
   270  	l2Miss     bool
   271  	l3Miss     bool
   272  	localPeer  bool
   273  	callerName string
   274  }
   275  
   276  func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) {
   277  	var err error
   278  	for {
   279  		select {
   280  		case <-ctx.Done():
   281  			return
   282  		case op := <-ch:
   283  			switch op.opType {
   284  			case peerOperationINIT:
   285  				err = d.peerInitOp(op.networkID)
   286  			case peerOperationADD:
   287  				err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer)
   288  			case peerOperationDELETE:
   289  				err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer)
   290  			case peerOperationFLUSH:
   291  				err = d.peerFlushOp(op.networkID)
   292  			}
   293  			if err != nil {
   294  				logrus.Warnf("Peer operation failed:%s op:%v", err, op)
   295  			}
   296  		}
   297  	}
   298  }
   299  
   300  func (d *driver) peerInit(nid string) {
   301  	callerName := caller.Name(1)
   302  	d.peerOpCh <- &peerOperation{
   303  		opType:     peerOperationINIT,
   304  		networkID:  nid,
   305  		callerName: callerName,
   306  	}
   307  }
   308  
   309  func (d *driver) peerInitOp(nid string) error {
   310  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   311  		// Local entries do not need to be added
   312  		if pEntry.isLocal {
   313  			return false
   314  		}
   315  
   316  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   317  		// return false to loop on all entries
   318  		return false
   319  	})
   320  }
   321  
   322  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   323  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   324  	d.peerOpCh <- &peerOperation{
   325  		opType:     peerOperationADD,
   326  		networkID:  nid,
   327  		endpointID: eid,
   328  		peerIP:     peerIP,
   329  		peerIPMask: peerIPMask,
   330  		peerMac:    peerMac,
   331  		vtepIP:     vtep,
   332  		l2Miss:     l2Miss,
   333  		l3Miss:     l3Miss,
   334  		localPeer:  localPeer,
   335  		callerName: caller.Name(1),
   336  	}
   337  }
   338  
   339  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   340  	if err := validateID(nid, eid); err != nil {
   341  		return err
   342  	}
   343  
   344  	var dbEntries int
   345  	var inserted bool
   346  	if updateDB {
   347  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   348  		if !inserted {
   349  			logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   350  				nid, eid, peerIP, peerMac, localPeer, vtep)
   351  		}
   352  	}
   353  
   354  	// Local peers do not need any further configuration
   355  	if localPeer {
   356  		return nil
   357  	}
   358  
   359  	n := d.network(nid)
   360  	if n == nil {
   361  		return nil
   362  	}
   363  
   364  	sbox := n.sandbox()
   365  	if sbox == nil {
   366  		// We are hitting this case for all the events that are arriving before that the sandbox
   367  		// is being created. The peer got already added into the database and the sanbox init will
   368  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   369  		return nil
   370  	}
   371  
   372  	IP := &net.IPNet{
   373  		IP:   peerIP,
   374  		Mask: peerIPMask,
   375  	}
   376  
   377  	s := n.getSubnetforIP(IP)
   378  	if s == nil {
   379  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   380  	}
   381  
   382  	if err := n.obtainVxlanID(s); err != nil {
   383  		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
   384  	}
   385  
   386  	if err := n.joinSandbox(s, false, false); err != nil {
   387  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   388  	}
   389  
   390  	if err := d.checkEncryption(nid, vtep, n.vxlanID(s), false, true); err != nil {
   391  		logrus.Warn(err)
   392  	}
   393  
   394  	// Add neighbor entry for the peer IP
   395  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
   396  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   397  			// We are in the transient case so only the first configuration is programmed into the kernel
   398  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   399  			// Note we are skipping also the next configuration
   400  			return nil
   401  		}
   402  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   403  	}
   404  
   405  	// Add fdb entry to the bridge for the peer mac
   406  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName),
   407  		sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil {
   408  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   409  	}
   410  
   411  	return nil
   412  }
   413  
   414  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   415  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   416  	d.peerOpCh <- &peerOperation{
   417  		opType:     peerOperationDELETE,
   418  		networkID:  nid,
   419  		endpointID: eid,
   420  		peerIP:     peerIP,
   421  		peerIPMask: peerIPMask,
   422  		peerMac:    peerMac,
   423  		vtepIP:     vtep,
   424  		callerName: caller.Name(1),
   425  		localPeer:  localPeer,
   426  	}
   427  }
   428  
   429  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   430  	if err := validateID(nid, eid); err != nil {
   431  		return err
   432  	}
   433  
   434  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   435  	if !deleted {
   436  		logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   437  			nid, eid, peerIP, peerMac, localPeer, vtep)
   438  	}
   439  
   440  	n := d.network(nid)
   441  	if n == nil {
   442  		return nil
   443  	}
   444  
   445  	sbox := n.sandbox()
   446  	if sbox == nil {
   447  		return nil
   448  	}
   449  
   450  	if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil {
   451  		logrus.Warn(err)
   452  	}
   453  
   454  	// Local peers do not have any local configuration to delete
   455  	if !localPeer {
   456  		// Remove fdb entry to the bridge for the peer mac
   457  		if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil {
   458  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   459  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   460  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   461  				return nil
   462  			}
   463  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   464  		}
   465  
   466  		// Delete neighbor entry for the peer IP
   467  		if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil {
   468  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   469  		}
   470  	}
   471  
   472  	if dbEntries == 0 {
   473  		return nil
   474  	}
   475  
   476  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   477  	// configuration active in the kernel.
   478  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   479  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   480  	if err != nil {
   481  		logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   482  		return err
   483  	}
   484  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   485  }
   486  
   487  func (d *driver) peerFlush(nid string) {
   488  	d.peerOpCh <- &peerOperation{
   489  		opType:     peerOperationFLUSH,
   490  		networkID:  nid,
   491  		callerName: caller.Name(1),
   492  	}
   493  }
   494  
   495  func (d *driver) peerFlushOp(nid string) error {
   496  	d.peerDb.Lock()
   497  	defer d.peerDb.Unlock()
   498  	_, ok := d.peerDb.mp[nid]
   499  	if !ok {
   500  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   501  	}
   502  	delete(d.peerDb.mp, nid)
   503  	return nil
   504  }
   505  
   506  func (d *driver) pushLocalDb() {
   507  	d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool {
   508  		if pEntry.isLocal {
   509  			d.pushLocalEndpointEvent("join", nid, pEntry.eid)
   510  		}
   511  		return false
   512  	})
   513  }
   514  
   515  func (d *driver) peerDBUpdateSelf() {
   516  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   517  		if pEntry.isLocal {
   518  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   519  		}
   520  		return false
   521  	})
   522  }