github.com/jfrazelle/docker@v1.1.2-0.20210712172922-bf78e25fe508/libnetwork/drivers/overlay/peerdb.go (about)

     1  // +build linux
     2  
     3  package overlay
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"net"
     9  	"sync"
    10  	"syscall"
    11  
    12  	"github.com/docker/docker/libnetwork/internal/caller"
    13  	"github.com/docker/docker/libnetwork/internal/setmatrix"
    14  	"github.com/docker/docker/libnetwork/osl"
    15  	"github.com/sirupsen/logrus"
    16  )
    17  
    18  const ovPeerTable = "overlay_peer_table"
    19  
    20  type peerKey struct {
    21  	peerIP  net.IP
    22  	peerMac net.HardwareAddr
    23  }
    24  
    25  type peerEntry struct {
    26  	eid        string
    27  	vtep       net.IP
    28  	peerIPMask net.IPMask
    29  	isLocal    bool
    30  }
    31  
    32  func (p *peerEntry) MarshalDB() peerEntryDB {
    33  	ones, bits := p.peerIPMask.Size()
    34  	return peerEntryDB{
    35  		eid:            p.eid,
    36  		vtep:           p.vtep.String(),
    37  		peerIPMaskOnes: ones,
    38  		peerIPMaskBits: bits,
    39  		isLocal:        p.isLocal,
    40  	}
    41  }
    42  
    43  // This the structure saved into the set (SetMatrix), due to the implementation of it
    44  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    45  // strings
    46  type peerEntryDB struct {
    47  	eid            string
    48  	vtep           string
    49  	peerIPMaskOnes int
    50  	peerIPMaskBits int
    51  	isLocal        bool
    52  }
    53  
    54  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    55  	return peerEntry{
    56  		eid:        p.eid,
    57  		vtep:       net.ParseIP(p.vtep),
    58  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    59  		isLocal:    p.isLocal,
    60  	}
    61  }
    62  
    63  type peerMap struct {
    64  	// set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks
    65  	mp setmatrix.SetMatrix
    66  	sync.Mutex
    67  }
    68  
    69  type peerNetworkMap struct {
    70  	// map with key peerKey
    71  	mp map[string]*peerMap
    72  	sync.Mutex
    73  }
    74  
    75  func (pKey peerKey) String() string {
    76  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    77  }
    78  
    79  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    80  	ipB, err := state.Token(true, nil)
    81  	if err != nil {
    82  		return err
    83  	}
    84  
    85  	pKey.peerIP = net.ParseIP(string(ipB))
    86  
    87  	macB, err := state.Token(true, nil)
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	pKey.peerMac, err = net.ParseMAC(string(macB))
    93  	return err
    94  }
    95  
    96  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    97  	d.peerDb.Lock()
    98  	nids := []string{}
    99  	for nid := range d.peerDb.mp {
   100  		nids = append(nids, nid)
   101  	}
   102  	d.peerDb.Unlock()
   103  
   104  	for _, nid := range nids {
   105  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   106  			return f(nid, pKey, pEntry)
   107  		})
   108  	}
   109  	return nil
   110  }
   111  
   112  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   113  	d.peerDb.Lock()
   114  	pMap, ok := d.peerDb.mp[nid]
   115  	d.peerDb.Unlock()
   116  
   117  	if !ok {
   118  		return nil
   119  	}
   120  
   121  	mp := map[string]peerEntry{}
   122  	pMap.Lock()
   123  	for _, pKeyStr := range pMap.mp.Keys() {
   124  		entryDBList, ok := pMap.mp.Get(pKeyStr)
   125  		if ok {
   126  			peerEntryDB := entryDBList[0].(peerEntryDB)
   127  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   128  		}
   129  	}
   130  	pMap.Unlock()
   131  
   132  	for pKeyStr, pEntry := range mp {
   133  		var pKey peerKey
   134  		pEntry := pEntry
   135  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   136  			logrus.Warnf("Peer key scan on network %s failed: %v", nid, err)
   137  		}
   138  		if f(&pKey, &pEntry) {
   139  			return nil
   140  		}
   141  	}
   142  
   143  	return nil
   144  }
   145  
   146  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   147  	var pKeyMatched *peerKey
   148  	var pEntryMatched *peerEntry
   149  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   150  		if pKey.peerIP.Equal(peerIP) {
   151  			pKeyMatched = pKey
   152  			pEntryMatched = pEntry
   153  			return true
   154  		}
   155  
   156  		return false
   157  	})
   158  
   159  	if err != nil {
   160  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   161  	}
   162  
   163  	if pKeyMatched == nil || pEntryMatched == nil {
   164  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   165  	}
   166  
   167  	return pKeyMatched, pEntryMatched, nil
   168  }
   169  
   170  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   171  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   172  
   173  	d.peerDb.Lock()
   174  	pMap, ok := d.peerDb.mp[nid]
   175  	if !ok {
   176  		d.peerDb.mp[nid] = &peerMap{
   177  			mp: setmatrix.NewSetMatrix(),
   178  		}
   179  
   180  		pMap = d.peerDb.mp[nid]
   181  	}
   182  	d.peerDb.Unlock()
   183  
   184  	pKey := peerKey{
   185  		peerIP:  peerIP,
   186  		peerMac: peerMac,
   187  	}
   188  
   189  	pEntry := peerEntry{
   190  		eid:        eid,
   191  		vtep:       vtep,
   192  		peerIPMask: peerIPMask,
   193  		isLocal:    isLocal,
   194  	}
   195  
   196  	pMap.Lock()
   197  	defer pMap.Unlock()
   198  	b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB())
   199  	if i != 1 {
   200  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   201  		s, _ := pMap.mp.String(pKey.String())
   202  		logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   203  	}
   204  	return b, i
   205  }
   206  
   207  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   208  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   209  
   210  	d.peerDb.Lock()
   211  	pMap, ok := d.peerDb.mp[nid]
   212  	if !ok {
   213  		d.peerDb.Unlock()
   214  		return false, 0
   215  	}
   216  	d.peerDb.Unlock()
   217  
   218  	pKey := peerKey{
   219  		peerIP:  peerIP,
   220  		peerMac: peerMac,
   221  	}
   222  
   223  	pEntry := peerEntry{
   224  		eid:        eid,
   225  		vtep:       vtep,
   226  		peerIPMask: peerIPMask,
   227  		isLocal:    isLocal,
   228  	}
   229  
   230  	pMap.Lock()
   231  	defer pMap.Unlock()
   232  	b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB())
   233  	if i != 0 {
   234  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   235  		s, _ := pMap.mp.String(pKey.String())
   236  		logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   237  	}
   238  	return b, i
   239  }
   240  
   241  // The overlay uses a lazy initialization approach, this means that when a network is created
   242  // and the driver registered the overlay does not allocate resources till the moment that a
   243  // sandbox is actually created.
   244  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   245  // networkDB has already delivered some events of peers already available on remote nodes,
   246  // these peers are saved into the peerDB and this function is used to properly configure
   247  // the network sandbox with all those peers that got previously notified.
   248  // Note also that this method sends a single message on the channel and the go routine on the
   249  // other side, will atomically loop on the whole table of peers and will program their state
   250  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   251  // new peerAdd or peerDelete gets reordered during the sandbox init.
   252  func (d *driver) initSandboxPeerDB(nid string) {
   253  	d.peerInit(nid)
   254  }
   255  
   256  type peerOperationType int32
   257  
   258  const (
   259  	peerOperationINIT peerOperationType = iota
   260  	peerOperationADD
   261  	peerOperationDELETE
   262  	peerOperationFLUSH
   263  )
   264  
   265  type peerOperation struct {
   266  	opType     peerOperationType
   267  	networkID  string
   268  	endpointID string
   269  	peerIP     net.IP
   270  	peerIPMask net.IPMask
   271  	peerMac    net.HardwareAddr
   272  	vtepIP     net.IP
   273  	l2Miss     bool
   274  	l3Miss     bool
   275  	localPeer  bool
   276  	callerName string
   277  }
   278  
   279  func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) {
   280  	var err error
   281  	for {
   282  		select {
   283  		case <-ctx.Done():
   284  			return
   285  		case op := <-ch:
   286  			switch op.opType {
   287  			case peerOperationINIT:
   288  				err = d.peerInitOp(op.networkID)
   289  			case peerOperationADD:
   290  				err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer)
   291  			case peerOperationDELETE:
   292  				err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer)
   293  			case peerOperationFLUSH:
   294  				err = d.peerFlushOp(op.networkID)
   295  			}
   296  			if err != nil {
   297  				logrus.Warnf("Peer operation failed:%s op:%v", err, op)
   298  			}
   299  		}
   300  	}
   301  }
   302  
   303  func (d *driver) peerInit(nid string) {
   304  	callerName := caller.Name(1)
   305  	d.peerOpCh <- &peerOperation{
   306  		opType:     peerOperationINIT,
   307  		networkID:  nid,
   308  		callerName: callerName,
   309  	}
   310  }
   311  
   312  func (d *driver) peerInitOp(nid string) error {
   313  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   314  		// Local entries do not need to be added
   315  		if pEntry.isLocal {
   316  			return false
   317  		}
   318  
   319  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   320  		// return false to loop on all entries
   321  		return false
   322  	})
   323  }
   324  
   325  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   326  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   327  	d.peerOpCh <- &peerOperation{
   328  		opType:     peerOperationADD,
   329  		networkID:  nid,
   330  		endpointID: eid,
   331  		peerIP:     peerIP,
   332  		peerIPMask: peerIPMask,
   333  		peerMac:    peerMac,
   334  		vtepIP:     vtep,
   335  		l2Miss:     l2Miss,
   336  		l3Miss:     l3Miss,
   337  		localPeer:  localPeer,
   338  		callerName: caller.Name(1),
   339  	}
   340  }
   341  
   342  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   343  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   344  
   345  	if err := validateID(nid, eid); err != nil {
   346  		return err
   347  	}
   348  
   349  	var dbEntries int
   350  	var inserted bool
   351  	if updateDB {
   352  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   353  		if !inserted {
   354  			logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   355  				nid, eid, peerIP, peerMac, localPeer, vtep)
   356  		}
   357  	}
   358  
   359  	// Local peers do not need any further configuration
   360  	if localPeer {
   361  		return nil
   362  	}
   363  
   364  	n := d.network(nid)
   365  	if n == nil {
   366  		return nil
   367  	}
   368  
   369  	sbox := n.sandbox()
   370  	if sbox == nil {
   371  		// We are hitting this case for all the events that are arriving before that the sandbox
   372  		// is being created. The peer got already added into the database and the sanbox init will
   373  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   374  		return nil
   375  	}
   376  
   377  	IP := &net.IPNet{
   378  		IP:   peerIP,
   379  		Mask: peerIPMask,
   380  	}
   381  
   382  	s := n.getSubnetforIP(IP)
   383  	if s == nil {
   384  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id)
   385  	}
   386  
   387  	if err := n.obtainVxlanID(s); err != nil {
   388  		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
   389  	}
   390  
   391  	if err := n.joinSandbox(s, false, false); err != nil {
   392  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   393  	}
   394  
   395  	if err := d.checkEncryption(nid, vtep, n.vxlanID(s), false, true); err != nil {
   396  		logrus.Warn(err)
   397  	}
   398  
   399  	// Add neighbor entry for the peer IP
   400  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
   401  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   402  			// We are in the transient case so only the first configuration is programmed into the kernel
   403  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   404  			// Note we are skipping also the next configuration
   405  			return nil
   406  		}
   407  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   408  	}
   409  
   410  	// Add fdb entry to the bridge for the peer mac
   411  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName),
   412  		sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil {
   413  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   414  	}
   415  
   416  	return nil
   417  }
   418  
   419  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   420  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   421  	d.peerOpCh <- &peerOperation{
   422  		opType:     peerOperationDELETE,
   423  		networkID:  nid,
   424  		endpointID: eid,
   425  		peerIP:     peerIP,
   426  		peerIPMask: peerIPMask,
   427  		peerMac:    peerMac,
   428  		vtepIP:     vtep,
   429  		callerName: caller.Name(1),
   430  		localPeer:  localPeer,
   431  	}
   432  }
   433  
   434  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   435  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   436  
   437  	if err := validateID(nid, eid); err != nil {
   438  		return err
   439  	}
   440  
   441  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   442  	if !deleted {
   443  		logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   444  			nid, eid, peerIP, peerMac, localPeer, vtep)
   445  	}
   446  
   447  	n := d.network(nid)
   448  	if n == nil {
   449  		return nil
   450  	}
   451  
   452  	sbox := n.sandbox()
   453  	if sbox == nil {
   454  		return nil
   455  	}
   456  
   457  	if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil {
   458  		logrus.Warn(err)
   459  	}
   460  
   461  	// Local peers do not have any local configuration to delete
   462  	if !localPeer {
   463  		// Remove fdb entry to the bridge for the peer mac
   464  		if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil {
   465  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   466  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   467  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   468  				return nil
   469  			}
   470  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   471  		}
   472  
   473  		// Delete neighbor entry for the peer IP
   474  		if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil {
   475  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   476  		}
   477  	}
   478  
   479  	if dbEntries == 0 {
   480  		return nil
   481  	}
   482  
   483  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   484  	// configuration active in the kernel.
   485  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   486  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   487  	if err != nil {
   488  		logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   489  		return err
   490  	}
   491  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   492  }
   493  
   494  func (d *driver) peerFlush(nid string) {
   495  	d.peerOpCh <- &peerOperation{
   496  		opType:     peerOperationFLUSH,
   497  		networkID:  nid,
   498  		callerName: caller.Name(1),
   499  	}
   500  }
   501  
   502  func (d *driver) peerFlushOp(nid string) error {
   503  	d.peerDb.Lock()
   504  	defer d.peerDb.Unlock()
   505  	_, ok := d.peerDb.mp[nid]
   506  	if !ok {
   507  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   508  	}
   509  	delete(d.peerDb.mp, nid)
   510  	return nil
   511  }
   512  
   513  func (d *driver) pushLocalDb() {
   514  	d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool {
   515  		if pEntry.isLocal {
   516  			d.pushLocalEndpointEvent("join", nid, pEntry.eid)
   517  		}
   518  		return false
   519  	})
   520  }
   521  
   522  func (d *driver) peerDBUpdateSelf() {
   523  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   524  		if pEntry.isLocal {
   525  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   526  		}
   527  		return false
   528  	})
   529  }