
     1  package overlay
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"sync"
     8  	"syscall"
    10  	""
    11  	""
    12  	""
    13  	""
    14  )
    16  const ovPeerTable = "overlay_peer_table"
    18  type peerKey struct {
    19  	peerIP  net.IP
    20  	peerMac net.HardwareAddr
    21  }
    23  type peerEntry struct {
    24  	eid        string
    25  	vtep       net.IP
    26  	peerIPMask net.IPMask
    27  	isLocal    bool
    28  }
    30  func (p *peerEntry) MarshalDB() peerEntryDB {
    31  	ones, bits := p.peerIPMask.Size()
    32  	return peerEntryDB{
    33  		eid:            p.eid,
    34  		vtep:           p.vtep.String(),
    35  		peerIPMaskOnes: ones,
    36  		peerIPMaskBits: bits,
    37  		isLocal:        p.isLocal,
    38  	}
    39  }
    41  // This the structure saved into the set (SetMatrix), due to the implementation of it
    42  // the value inserted in the set has to be Hashable so the []byte had to be converted into
    43  // strings
    44  type peerEntryDB struct {
    45  	eid            string
    46  	vtep           string
    47  	peerIPMaskOnes int
    48  	peerIPMaskBits int
    49  	isLocal        bool
    50  }
    52  func (p *peerEntryDB) UnMarshalDB() peerEntry {
    53  	return peerEntry{
    54  		eid:        p.eid,
    55  		vtep:       net.ParseIP(p.vtep),
    56  		peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits),
    57  		isLocal:    p.isLocal,
    58  	}
    59  }
    61  type peerMap struct {
    62  	// set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks
    63  	mp setmatrix.SetMatrix
    64  	sync.Mutex
    65  }
    67  type peerNetworkMap struct {
    68  	// map with key peerKey
    69  	mp map[string]*peerMap
    70  	sync.Mutex
    71  }
    73  func (pKey peerKey) String() string {
    74  	return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac)
    75  }
    77  func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error {
    78  	ipB, err := state.Token(true, nil)
    79  	if err != nil {
    80  		return err
    81  	}
    83  	pKey.peerIP = net.ParseIP(string(ipB))
    85  	macB, err := state.Token(true, nil)
    86  	if err != nil {
    87  		return err
    88  	}
    90  	pKey.peerMac, err = net.ParseMAC(string(macB))
    91  	return err
    92  }
    94  func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error {
    95  	d.peerDb.Lock()
    96  	nids := []string{}
    97  	for nid := range {
    98  		nids = append(nids, nid)
    99  	}
   100  	d.peerDb.Unlock()
   102  	for _, nid := range nids {
   103  		d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   104  			return f(nid, pKey, pEntry)
   105  		})
   106  	}
   107  	return nil
   108  }
   110  func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error {
   111  	d.peerDb.Lock()
   112  	pMap, ok :=[nid]
   113  	d.peerDb.Unlock()
   115  	if !ok {
   116  		return nil
   117  	}
   119  	mp := map[string]peerEntry{}
   120  	pMap.Lock()
   121  	for _, pKeyStr := range {
   122  		entryDBList, ok :=
   123  		if ok {
   124  			peerEntryDB := entryDBList[0].(peerEntryDB)
   125  			mp[pKeyStr] = peerEntryDB.UnMarshalDB()
   126  		}
   127  	}
   128  	pMap.Unlock()
   130  	for pKeyStr, pEntry := range mp {
   131  		var pKey peerKey
   132  		if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil {
   133  			logrus.Warnf("Peer key scan on network %s failed: %v", nid, err)
   134  		}
   135  		if f(&pKey, &pEntry) {
   136  			return nil
   137  		}
   138  	}
   140  	return nil
   141  }
   143  func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) {
   144  	var pKeyMatched *peerKey
   145  	var pEntryMatched *peerEntry
   146  	err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   147  		if pKey.peerIP.Equal(peerIP) {
   148  			pKeyMatched = pKey
   149  			pEntryMatched = pEntry
   150  			return true
   151  		}
   153  		return false
   154  	})
   156  	if err != nil {
   157  		return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
   158  	}
   160  	if pKeyMatched == nil || pEntryMatched == nil {
   161  		return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
   162  	}
   164  	return pKeyMatched, pEntryMatched, nil
   165  }
   167  func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   168  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   170  	d.peerDb.Lock()
   171  	pMap, ok :=[nid]
   172  	if !ok {
   173[nid] = &peerMap{
   174  			mp: setmatrix.NewSetMatrix(),
   175  		}
   177  		pMap =[nid]
   178  	}
   179  	d.peerDb.Unlock()
   181  	pKey := peerKey{
   182  		peerIP:  peerIP,
   183  		peerMac: peerMac,
   184  	}
   186  	pEntry := peerEntry{
   187  		eid:        eid,
   188  		vtep:       vtep,
   189  		peerIPMask: peerIPMask,
   190  		isLocal:    isLocal,
   191  	}
   193  	pMap.Lock()
   194  	defer pMap.Unlock()
   195  	b, i :=, pEntry.MarshalDB())
   196  	if i != 1 {
   197  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   198  		s, _ :=
   199  		logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   200  	}
   201  	return b, i
   202  }
   204  func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   205  	peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) {
   207  	d.peerDb.Lock()
   208  	pMap, ok :=[nid]
   209  	if !ok {
   210  		d.peerDb.Unlock()
   211  		return false, 0
   212  	}
   213  	d.peerDb.Unlock()
   215  	pKey := peerKey{
   216  		peerIP:  peerIP,
   217  		peerMac: peerMac,
   218  	}
   220  	pEntry := peerEntry{
   221  		eid:        eid,
   222  		vtep:       vtep,
   223  		peerIPMask: peerIPMask,
   224  		isLocal:    isLocal,
   225  	}
   227  	pMap.Lock()
   228  	defer pMap.Unlock()
   229  	b, i :=, pEntry.MarshalDB())
   230  	if i != 0 {
   231  		// Transient case, there is more than one endpoint that is using the same IP,MAC pair
   232  		s, _ :=
   233  		logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
   234  	}
   235  	return b, i
   236  }
   238  // The overlay uses a lazy initialization approach, this means that when a network is created
   239  // and the driver registered the overlay does not allocate resources till the moment that a
   240  // sandbox is actually created.
   241  // At the moment of this call, that happens when a sandbox is initialized, is possible that
   242  // networkDB has already delivered some events of peers already available on remote nodes,
   243  // these peers are saved into the peerDB and this function is used to properly configure
   244  // the network sandbox with all those peers that got previously notified.
   245  // Note also that this method sends a single message on the channel and the go routine on the
   246  // other side, will atomically loop on the whole table of peers and will program their state
   247  // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
   248  // new peerAdd or peerDelete gets reordered during the sandbox init.
   249  func (d *driver) initSandboxPeerDB(nid string) {
   250  	d.peerInit(nid)
   251  }
   253  type peerOperationType int32
   255  const (
   256  	peerOperationINIT peerOperationType = iota
   257  	peerOperationADD
   258  	peerOperationDELETE
   259  	peerOperationFLUSH
   260  )
   262  type peerOperation struct {
   263  	opType     peerOperationType
   264  	networkID  string
   265  	endpointID string
   266  	peerIP     net.IP
   267  	peerIPMask net.IPMask
   268  	peerMac    net.HardwareAddr
   269  	vtepIP     net.IP
   270  	l2Miss     bool
   271  	l3Miss     bool
   272  	localPeer  bool
   273  	callerName string
   274  }
   276  func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) {
   277  	var err error
   278  	for {
   279  		select {
   280  		case <-ctx.Done():
   281  			return
   282  		case op := <-ch:
   283  			switch op.opType {
   284  			case peerOperationINIT:
   285  				err = d.peerInitOp(op.networkID)
   286  			case peerOperationADD:
   287  				err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer)
   288  			case peerOperationDELETE:
   289  				err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer)
   290  			case peerOperationFLUSH:
   291  				err = d.peerFlushOp(op.networkID)
   292  			}
   293  			if err != nil {
   294  				logrus.Warnf("Peer operation failed:%s op:%v", err, op)
   295  			}
   296  		}
   297  	}
   298  }
   300  func (d *driver) peerInit(nid string) {
   301  	callerName := caller.Name(1)
   302  	d.peerOpCh <- &peerOperation{
   303  		opType:     peerOperationINIT,
   304  		networkID:  nid,
   305  		callerName: callerName,
   306  	}
   307  }
   309  func (d *driver) peerInitOp(nid string) error {
   310  	return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
   311  		// Local entries do not need to be added
   312  		if pEntry.isLocal {
   313  			return false
   314  		}
   316  		d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal)
   317  		// return false to loop on all entries
   318  		return false
   319  	})
   320  }
   322  func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   323  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) {
   324  	d.peerOpCh <- &peerOperation{
   325  		opType:     peerOperationADD,
   326  		networkID:  nid,
   327  		endpointID: eid,
   328  		peerIP:     peerIP,
   329  		peerIPMask: peerIPMask,
   330  		peerMac:    peerMac,
   331  		vtepIP:     vtep,
   332  		l2Miss:     l2Miss,
   333  		l3Miss:     l3Miss,
   334  		localPeer:  localPeer,
   335  		callerName: caller.Name(1),
   336  	}
   337  }
   339  func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   340  	peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error {
   342  	if err := validateID(nid, eid); err != nil {
   343  		return err
   344  	}
   346  	var dbEntries int
   347  	var inserted bool
   348  	if updateDB {
   349  		inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   350  		if !inserted {
   351  			logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   352  				nid, eid, peerIP, peerMac, localPeer, vtep)
   353  		}
   354  	}
   356  	// Local peers do not need any further configuration
   357  	if localPeer {
   358  		return nil
   359  	}
   361  	n :=
   362  	if n == nil {
   363  		return nil
   364  	}
   366  	sbox := n.sandbox()
   367  	if sbox == nil {
   368  		// We are hitting this case for all the events that are arriving before that the sandbox
   369  		// is being created. The peer got already added into the database and the sanbox init will
   370  		// call the peerDbUpdateSandbox that will configure all these peers from the database
   371  		return nil
   372  	}
   374  	IP := &net.IPNet{
   375  		IP:   peerIP,
   376  		Mask: peerIPMask,
   377  	}
   379  	s := n.getSubnetforIP(IP)
   380  	if s == nil {
   381  		return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(),
   382  	}
   384  	if err := n.obtainVxlanID(s); err != nil {
   385  		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
   386  	}
   388  	if err := n.joinSandbox(s, false, false); err != nil {
   389  		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
   390  	}
   392  	if err := d.checkEncryption(nid, vtep, false, true); err != nil {
   393  		logrus.Warn(err)
   394  	}
   396  	// Add neighbor entry for the peer IP
   397  	if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
   398  		if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
   399  			// We are in the transient case so only the first configuration is programmed into the kernel
   400  			// Upon deletion if the active configuration is deleted the next one from the database will be restored
   401  			// Note we are skipping also the next configuration
   402  			return nil
   403  		}
   404  		return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   405  	}
   407  	// Add fdb entry to the bridge for the peer mac
   408  	if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName),
   409  		sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil {
   410  		return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   411  	}
   413  	return nil
   414  }
   416  func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   417  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) {
   418  	d.peerOpCh <- &peerOperation{
   419  		opType:     peerOperationDELETE,
   420  		networkID:  nid,
   421  		endpointID: eid,
   422  		peerIP:     peerIP,
   423  		peerIPMask: peerIPMask,
   424  		peerMac:    peerMac,
   425  		vtepIP:     vtep,
   426  		callerName: caller.Name(1),
   427  		localPeer:  localPeer,
   428  	}
   429  }
   431  func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask,
   432  	peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error {
   434  	if err := validateID(nid, eid); err != nil {
   435  		return err
   436  	}
   438  	deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer)
   439  	if !deleted {
   440  		logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v",
   441  			nid, eid, peerIP, peerMac, localPeer, vtep)
   442  	}
   444  	n :=
   445  	if n == nil {
   446  		return nil
   447  	}
   449  	sbox := n.sandbox()
   450  	if sbox == nil {
   451  		return nil
   452  	}
   454  	if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil {
   455  		logrus.Warn(err)
   456  	}
   458  	// Local peers do not have any local configuration to delete
   459  	if !localPeer {
   460  		// Remove fdb entry to the bridge for the peer mac
   461  		if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil {
   462  			if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
   463  				// We fall in here if there is a transient state and if the neighbor that is being deleted
   464  				// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
   465  				return nil
   466  			}
   467  			return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   468  		}
   470  		// Delete neighbor entry for the peer IP
   471  		if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil {
   472  			return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
   473  		}
   474  	}
   476  	if dbEntries == 0 {
   477  		return nil
   478  	}
   480  	// If there is still an entry into the database and the deletion went through without errors means that there is now no
   481  	// configuration active in the kernel.
   482  	// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
   483  	peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP)
   484  	if err != nil {
   485  		logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
   486  		return err
   487  	}
   488  	return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal)
   489  }
   491  func (d *driver) peerFlush(nid string) {
   492  	d.peerOpCh <- &peerOperation{
   493  		opType:     peerOperationFLUSH,
   494  		networkID:  nid,
   495  		callerName: caller.Name(1),
   496  	}
   497  }
   499  func (d *driver) peerFlushOp(nid string) error {
   500  	d.peerDb.Lock()
   501  	defer d.peerDb.Unlock()
   502  	_, ok :=[nid]
   503  	if !ok {
   504  		return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
   505  	}
   506  	delete(, nid)
   507  	return nil
   508  }
   510  func (d *driver) pushLocalDb() {
   511  	d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool {
   512  		if pEntry.isLocal {
   513  			d.pushLocalEndpointEvent("join", nid, pEntry.eid)
   514  		}
   515  		return false
   516  	})
   517  }
   519  func (d *driver) peerDBUpdateSelf() {
   520  	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
   521  		if pEntry.isLocal {
   522  			pEntry.vtep = net.ParseIP(d.advertiseAddress)
   523  		}
   524  		return false
   525  	})
   526  }