github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/drivers/overlay/peerdb.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  package overlay
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"sync"
    11  	"syscall"
    12  
    13  	"github.com/docker/docker/libnetwork/internal/caller"
    14  	"github.com/docker/docker/libnetwork/internal/setmatrix"
    15  	"github.com/docker/docker/libnetwork/osl"
    16  	"github.com/sirupsen/logrus"
    17  )
    18  
    19  const ovPeerTable = "overlay_peer_table"
    20  
    21  type peerKey struct {
    22  	peerIP  net.IP
    23  	peerMac net.HardwareAddr
    24  }
    25  
    26  type peerEntry struct {
    27  	eid        string
    28  	vtep       net.IP
    29  	peerIPMask net.IPMask
    30  	isLocal    bool
    31  }
    32  
    33  func (p *peerEntry) MarshalDB() peerEntryDB {
    34  	ones, bits := p.peerIPMask.Size()
    35  	return peerEntryDB{
    36  		eid:            p.eid,
    37  		vtep:           p.vtep.String(),
    38  		peerIPMaskOnes: ones,
    39  		peerIPMaskBits: bits,
    40  		isLocal:        p.isLocal,
    41  	}
    42  }
    43  
    44  // This the structure saved into the set (SetMatrix), due to the implementation of it
    45  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    46  // strings
    47  type peerEntryDB struct {
    48  	eid            string
    49  	vtep           string
    50  	peerIPMaskOnes int
    51  	peerIPMaskBits int
    52  	isLocal        bool
    53  }
    54  
    55  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    56  	return peerEntry{
    57  		eid:        p.eid,
    58  		vtep:       net.ParseIP(p.vtep),
    59  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    60  		isLocal:    p.isLocal,
    61  	}
    62  }
    63  
    64  type peerMap struct {
    65  	// set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks
    66  	mp setmatrix.SetMatrix
    67  	sync.Mutex
    68  }
    69  
    70  type peerNetworkMap struct {
    71  	// map with key peerKey
    72  	mp map[string]*peerMap
    73  	sync.Mutex
    74  }
    75  
    76  func (pKey peerKey) String() string {
    77  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    78  }
    79  
    80  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    81  	ipB, err := state.Token(true, nil)
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	pKey.peerIP = net.ParseIP(string(ipB))
    87  
    88  	macB, err := state.Token(true, nil)
    89  	if err != nil {
    90  		return err
    91  	}
    92  
    93  	pKey.peerMac, err = net.ParseMAC(string(macB))
    94  	return err
    95  }
    96  
    97  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    98  	d.peerDb.Lock()
    99  	nids := []string{}
   100  	for nid := range d.peerDb.mp {
   101  		nids = append(nids, nid)
   102  	}
   103  	d.peerDb.Unlock()
   104  
   105  	for _, nid := range nids {
   106  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   107  			return f(nid, pKey, pEntry)
   108  		})
   109  	}
   110  	return nil
   111  }
   112  
   113  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   114  	d.peerDb.Lock()
   115  	pMap, ok := d.peerDb.mp[nid]
   116  	d.peerDb.Unlock()
   117  
   118  	if !ok {
   119  		return nil
   120  	}
   121  
   122  	mp := map[string]peerEntry{}
   123  	pMap.Lock()
   124  	for _, pKeyStr := range pMap.mp.Keys() {
   125  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   126  		if ok {
   127  			peerEntryDB := entryDBList[0].(peerEntryDB)
   128  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   129  		}
   130  	}
   131  	pMap.Unlock()
   132  
   133  	for pKeyStr, pEntry := range mp {
   134  		var pKey peerKey
   135  		pEntry := pEntry
   136  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   137  			logrus.Warnf("Peer key scan on network %s failed: %v", nid, err)
   138  		}
   139  		if f(&pKey, &pEntry) {
   140  			return nil
   141  		}
   142  	}
   143  
   144  	return nil
   145  }
   146  
   147  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   148  	var pKeyMatched *peerKey
   149  	var pEntryMatched *peerEntry
   150  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   151  		if pKey.peerIP.Equal(peerIP) {
   152  			pKeyMatched = pKey
   153  			pEntryMatched = pEntry
   154  			return true
   155  		}
   156  
   157  		return false
   158  	})
   159  
   160  	if err != nil {
   161  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   162  	}
   163  
   164  	if pKeyMatched == nil || pEntryMatched == nil {
   165  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   166  	}
   167  
   168  	return pKeyMatched, pEntryMatched, nil
   169  }
   170  
   171  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   172  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   173  
   174  	d.peerDb.Lock()
   175  	pMap, ok := d.peerDb.mp[nid]
   176  	if !ok {
   177  		d.peerDb.mp[nid] = &peerMap{
   178  			mp: setmatrix.NewSetMatrix(),
   179  		}
   180  
   181  		pMap = d.peerDb.mp[nid]
   182  	}
   183  	d.peerDb.Unlock()
   184  
   185  	pKey := peerKey{
   186  		peerIP:  peerIP,
   187  		peerMac: peerMac,
   188  	}
   189  
   190  	pEntry := peerEntry{
   191  		eid:        eid,
   192  		vtep:       vtep,
   193  		peerIPMask: peerIPMask,
   194  		isLocal:    isLocal,
   195  	}
   196  
   197  	pMap.Lock()
   198  	defer pMap.Unlock()
   199  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   200  	if i != 1 {
   201  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   202  		s, _ := pMap.mp.String(pKey.String())
   203  		logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   204  	}
   205  	return b, i
   206  }
   207  
   208  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   209  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   210  
   211  	d.peerDb.Lock()
   212  	pMap, ok := d.peerDb.mp[nid]
   213  	if !ok {
   214  		d.peerDb.Unlock()
   215  		return false, 0
   216  	}
   217  	d.peerDb.Unlock()
   218  
   219  	pKey := peerKey{
   220  		peerIP:  peerIP,
   221  		peerMac: peerMac,
   222  	}
   223  
   224  	pEntry := peerEntry{
   225  		eid:        eid,
   226  		vtep:       vtep,
   227  		peerIPMask: peerIPMask,
   228  		isLocal:    isLocal,
   229  	}
   230  
   231  	pMap.Lock()
   232  	defer pMap.Unlock()
   233  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   234  	if i != 0 {
   235  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   236  		s, _ := pMap.mp.String(pKey.String())
   237  		logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   238  	}
   239  	return b, i
   240  }
   241  
   242  // The overlay uses a lazy initialization approach, this means that when a network is created
   243  // and the driver registered the overlay does not allocate resources till the moment that a
   244  // sandbox is actually created.
   245  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   246  // networkDB has already delivered some events of peers already available on remote nodes,
   247  // these peers are saved into the peerDB and this function is used to properly configure
   248  // the network sandbox with all those peers that got previously notified.
   249  // Note also that this method sends a single message on the channel and the go routine on the
   250  // other side, will atomically loop on the whole table of peers and will program their state
   251  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   252  // new peerAdd or peerDelete gets reordered during the sandbox init.
   253  func (d *driver) initSandboxPeerDB(nid string) {
   254  	d.peerInit(nid)
   255  }
   256  
   257  type peerOperationType int32
   258  
   259  const (
   260  	peerOperationINIT peerOperationType = iota
   261  	peerOperationADD
   262  	peerOperationDELETE
   263  	peerOperationFLUSH
   264  )
   265  
   266  type peerOperation struct {
   267  	opType     peerOperationType
   268  	networkID  string
   269  	endpointID string
   270  	peerIP     net.IP
   271  	peerIPMask net.IPMask
   272  	peerMac    net.HardwareAddr
   273  	vtepIP     net.IP
   274  	l2Miss     bool
   275  	l3Miss     bool
   276  	localPeer  bool
   277  	callerName string
   278  }
   279  
   280  func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) {
   281  	var err error
   282  	for {
   283  		select {
   284  		case <-ctx.Done():
   285  			return
   286  		case op := <-ch:
   287  			switch op.opType {
   288  			case peerOperationINIT:
   289  				err = d.peerInitOp(op.networkID)
   290  			case peerOperationADD:
   291  				err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer)
   292  			case peerOperationDELETE:
   293  				err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer)
   294  			case peerOperationFLUSH:
   295  				err = d.peerFlushOp(op.networkID)
   296  			}
   297  			if err != nil {
   298  				logrus.Warnf("Peer operation failed:%s op:%v", err, op)
   299  			}
   300  		}
   301  	}
   302  }
   303  
   304  func (d *driver) peerInit(nid string) {
   305  	callerName := caller.Name(1)
   306  	d.peerOpCh <- &peerOperation{
   307  		opType:     peerOperationINIT,
   308  		networkID:  nid,
   309  		callerName: callerName,
   310  	}
   311  }
   312  
   313  func (d *driver) peerInitOp(nid string) error {
   314  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   315  		// Local entries do not need to be added
   316  		if pEntry.isLocal {
   317  			return false
   318  		}
   319  
   320  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   321  		// return false to loop on all entries
   322  		return false
   323  	})
   324  }
   325  
   326  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   327  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   328  	d.peerOpCh <- &peerOperation{
   329  		opType:     peerOperationADD,
   330  		networkID:  nid,
   331  		endpointID: eid,
   332  		peerIP:     peerIP,
   333  		peerIPMask: peerIPMask,
   334  		peerMac:    peerMac,
   335  		vtepIP:     vtep,
   336  		l2Miss:     l2Miss,
   337  		l3Miss:     l3Miss,
   338  		localPeer:  localPeer,
   339  		callerName: caller.Name(1),
   340  	}
   341  }
   342  
   343  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   344  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   345  
   346  	if err := validateID(nid, eid); err != nil {
   347  		return err
   348  	}
   349  
   350  	var dbEntries int
   351  	var inserted bool
   352  	if updateDB {
   353  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   354  		if !inserted {
   355  			logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   356  				nid, eid, peerIP, peerMac, localPeer, vtep)
   357  		}
   358  	}
   359  
   360  	// Local peers do not need any further configuration
   361  	if localPeer {
   362  		return nil
   363  	}
   364  
   365  	n := d.network(nid)
   366  	if n == nil {
   367  		return nil
   368  	}
   369  
   370  	sbox := n.sandbox()
   371  	if sbox == nil {
   372  		// We are hitting this case for all the events that are arriving before that the sandbox
   373  		// is being created. The peer got already added into the database and the sanbox init will
   374  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   375  		return nil
   376  	}
   377  
   378  	IP := &net.IPNet{
   379  		IP:   peerIP,
   380  		Mask: peerIPMask,
   381  	}
   382  
   383  	s := n.getSubnetforIP(IP)
   384  	if s == nil {
   385  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   386  	}
   387  
   388  	if err := n.obtainVxlanID(s); err != nil {
   389  		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
   390  	}
   391  
   392  	if err := n.joinSandbox(s, false, false); err != nil {
   393  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   394  	}
   395  
   396  	if err := d.checkEncryption(nid, vtep, n.vxlanID(s), false, true); err != nil {
   397  		logrus.Warn(err)
   398  	}
   399  
   400  	// Add neighbor entry for the peer IP
   401  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
   402  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   403  			// We are in the transient case so only the first configuration is programmed into the kernel
   404  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   405  			// Note we are skipping also the next configuration
   406  			return nil
   407  		}
   408  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   409  	}
   410  
   411  	// Add fdb entry to the bridge for the peer mac
   412  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName),
   413  		sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil {
   414  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   415  	}
   416  
   417  	return nil
   418  }
   419  
   420  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   421  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   422  	d.peerOpCh <- &peerOperation{
   423  		opType:     peerOperationDELETE,
   424  		networkID:  nid,
   425  		endpointID: eid,
   426  		peerIP:     peerIP,
   427  		peerIPMask: peerIPMask,
   428  		peerMac:    peerMac,
   429  		vtepIP:     vtep,
   430  		callerName: caller.Name(1),
   431  		localPeer:  localPeer,
   432  	}
   433  }
   434  
   435  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   436  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   437  
   438  	if err := validateID(nid, eid); err != nil {
   439  		return err
   440  	}
   441  
   442  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   443  	if !deleted {
   444  		logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   445  			nid, eid, peerIP, peerMac, localPeer, vtep)
   446  	}
   447  
   448  	n := d.network(nid)
   449  	if n == nil {
   450  		return nil
   451  	}
   452  
   453  	sbox := n.sandbox()
   454  	if sbox == nil {
   455  		return nil
   456  	}
   457  
   458  	if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil {
   459  		logrus.Warn(err)
   460  	}
   461  
   462  	// Local peers do not have any local configuration to delete
   463  	if !localPeer {
   464  		// Remove fdb entry to the bridge for the peer mac
   465  		if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil {
   466  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   467  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   468  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   469  				return nil
   470  			}
   471  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   472  		}
   473  
   474  		// Delete neighbor entry for the peer IP
   475  		if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil {
   476  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   477  		}
   478  	}
   479  
   480  	if dbEntries == 0 {
   481  		return nil
   482  	}
   483  
   484  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   485  	// configuration active in the kernel.
   486  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   487  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   488  	if err != nil {
   489  		logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   490  		return err
   491  	}
   492  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   493  }
   494  
   495  func (d *driver) peerFlush(nid string) {
   496  	d.peerOpCh <- &peerOperation{
   497  		opType:     peerOperationFLUSH,
   498  		networkID:  nid,
   499  		callerName: caller.Name(1),
   500  	}
   501  }
   502  
   503  func (d *driver) peerFlushOp(nid string) error {
   504  	d.peerDb.Lock()
   505  	defer d.peerDb.Unlock()
   506  	_, ok := d.peerDb.mp[nid]
   507  	if !ok {
   508  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   509  	}
   510  	delete(d.peerDb.mp, nid)
   511  	return nil
   512  }
   513  
   514  func (d *driver) pushLocalDb() {
   515  	d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool {
   516  		if pEntry.isLocal {
   517  			d.pushLocalEndpointEvent("join", nid, pEntry.eid)
   518  		}
   519  		return false
   520  	})
   521  }
   522  
   523  func (d *driver) peerDBUpdateSelf() {
   524  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   525  		if pEntry.isLocal {
   526  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   527  		}
   528  		return false
   529  	})
   530  }