github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/networkdb/delegate.go (about)

     1  package networkdb
     2  
     3  import (
     4  	"net"
     5  	"time"
     6  
     7  	"github.com/gogo/protobuf/proto"
     8  	"github.com/sirupsen/logrus"
     9  )
    10  
    11  type delegate struct {
    12  	nDB *NetworkDB
    13  }
    14  
    15  func (d *delegate) NodeMeta(limit int) []byte {
    16  	return []byte{}
    17  }
    18  
    19  func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool {
    20  	// Update our local clock if the received messages has newer
    21  	// time.
    22  	nDB.networkClock.Witness(nEvent.LTime)
    23  
    24  	nDB.Lock()
    25  	defer nDB.Unlock()
    26  
    27  	// check if the node exists
    28  	n, _, _ := nDB.findNode(nEvent.NodeName)
    29  	if n == nil {
    30  		return false
    31  	}
    32  
    33  	// check if the event is fresh
    34  	if n.ltime >= nEvent.LTime {
    35  		return false
    36  	}
    37  
    38  	// If we are here means that the event is fresher and the node is known. Update the laport time
    39  	n.ltime = nEvent.LTime
    40  
    41  	// If the node is not known from memberlist we cannot process save any state of it else if it actually
    42  	// dies we won't receive any notification and we will remain stuck with it
    43  	if _, ok := nDB.nodes[nEvent.NodeName]; !ok {
    44  		logrus.Errorf("node: %s is unknown to memberlist", nEvent.NodeName)
    45  		return false
    46  	}
    47  
    48  	switch nEvent.Type {
    49  	case NodeEventTypeJoin:
    50  		moved, err := nDB.changeNodeState(n.Name, nodeActiveState)
    51  		if err != nil {
    52  			logrus.WithError(err).Error("unable to find the node to move")
    53  			return false
    54  		}
    55  		if moved {
    56  			logrus.Infof("%v(%v): Node join event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr)
    57  		}
    58  		return moved
    59  	case NodeEventTypeLeave:
    60  		moved, err := nDB.changeNodeState(n.Name, nodeLeftState)
    61  		if err != nil {
    62  			logrus.WithError(err).Error("unable to find the node to move")
    63  			return false
    64  		}
    65  		if moved {
    66  			logrus.Infof("%v(%v): Node leave event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr)
    67  		}
    68  		return moved
    69  	}
    70  
    71  	return false
    72  }
    73  
    74  func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
    75  	// Update our local clock if the received messages has newer
    76  	// time.
    77  	nDB.networkClock.Witness(nEvent.LTime)
    78  
    79  	nDB.Lock()
    80  	defer nDB.Unlock()
    81  
    82  	if nEvent.NodeName == nDB.config.NodeID {
    83  		return false
    84  	}
    85  
    86  	nodeNetworks, ok := nDB.networks[nEvent.NodeName]
    87  	if !ok {
    88  		// We haven't heard about this node at all.  Ignore the leave
    89  		if nEvent.Type == NetworkEventTypeLeave {
    90  			return false
    91  		}
    92  
    93  		nodeNetworks = make(map[string]*network)
    94  		nDB.networks[nEvent.NodeName] = nodeNetworks
    95  	}
    96  
    97  	if n, ok := nodeNetworks[nEvent.NetworkID]; ok {
    98  		// We have the latest state. Ignore the event
    99  		// since it is stale.
   100  		if n.ltime >= nEvent.LTime {
   101  			return false
   102  		}
   103  
   104  		n.ltime = nEvent.LTime
   105  		n.leaving = nEvent.Type == NetworkEventTypeLeave
   106  		if n.leaving {
   107  			n.reapTime = nDB.config.reapNetworkInterval
   108  
   109  			// The remote node is leaving the network, but not the gossip cluster.
   110  			// Mark all its entries in deleted state, this will guarantee that
   111  			// if some node bulk sync with us, the deleted state of
   112  			// these entries will be propagated.
   113  			nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName)
   114  		}
   115  
   116  		if nEvent.Type == NetworkEventTypeLeave {
   117  			nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   118  		} else {
   119  			nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   120  		}
   121  
   122  		return true
   123  	}
   124  
   125  	if nEvent.Type == NetworkEventTypeLeave {
   126  		return false
   127  	}
   128  
   129  	// If the node is not known from memberlist we cannot process save any state of it else if it actually
   130  	// dies we won't receive any notification and we will remain stuck with it
   131  	if _, ok := nDB.nodes[nEvent.NodeName]; !ok {
   132  		return false
   133  	}
   134  
   135  	// This remote network join is being seen the first time.
   136  	nodeNetworks[nEvent.NetworkID] = &network{
   137  		id:    nEvent.NetworkID,
   138  		ltime: nEvent.LTime,
   139  	}
   140  
   141  	nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   142  	return true
   143  }
   144  
   145  func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool {
   146  	// Update our local clock if the received messages has newer time.
   147  	nDB.tableClock.Witness(tEvent.LTime)
   148  
   149  	// Ignore the table events for networks that are in the process of going away
   150  	nDB.RLock()
   151  	networks := nDB.networks[nDB.config.NodeID]
   152  	network, ok := networks[tEvent.NetworkID]
   153  	// Check if the owner of the event is still part of the network
   154  	nodes := nDB.networkNodes[tEvent.NetworkID]
   155  	var nodePresent bool
   156  	for _, node := range nodes {
   157  		if node == tEvent.NodeName {
   158  			nodePresent = true
   159  			break
   160  		}
   161  	}
   162  	nDB.RUnlock()
   163  
   164  	if !ok || network.leaving || !nodePresent {
   165  		// I'm out of the network OR the event owner is not anymore part of the network so do not propagate
   166  		return false
   167  	}
   168  
   169  	nDB.Lock()
   170  	e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key)
   171  	if err == nil {
   172  		// We have the latest state. Ignore the event
   173  		// since it is stale.
   174  		if e.ltime >= tEvent.LTime {
   175  			nDB.Unlock()
   176  			return false
   177  		}
   178  	} else if tEvent.Type == TableEventTypeDelete && !isBulkSync {
   179  		nDB.Unlock()
   180  		// We don't know the entry, the entry is being deleted and the message is an async message
   181  		// In this case the safest approach is to ignore it, it is possible that the queue grew so much to
   182  		// exceed the garbage collection time (the residual reap time that is in the message is not being
   183  		// updated, to avoid inserting too many messages in the queue).
   184  		// Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time
   185  		return false
   186  	}
   187  
   188  	e = &entry{
   189  		ltime:    tEvent.LTime,
   190  		node:     tEvent.NodeName,
   191  		value:    tEvent.Value,
   192  		deleting: tEvent.Type == TableEventTypeDelete,
   193  		reapTime: time.Duration(tEvent.ResidualReapTime) * time.Second,
   194  	}
   195  
   196  	// All the entries marked for deletion should have a reapTime set greater than 0
   197  	// This case can happen if the cluster is running different versions of the engine where the old version does not have the
   198  	// field. If that is not the case, this can be a BUG
   199  	if e.deleting && e.reapTime == 0 {
   200  		logrus.Warnf("%v(%v) handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?",
   201  			nDB.config.Hostname, nDB.config.NodeID, tEvent)
   202  		e.reapTime = nDB.config.reapEntryInterval
   203  	}
   204  	nDB.createOrUpdateEntry(tEvent.NetworkID, tEvent.TableName, tEvent.Key, e)
   205  	nDB.Unlock()
   206  
   207  	if err != nil && tEvent.Type == TableEventTypeDelete {
   208  		// Again we don't know the entry but this is coming from a TCP sync so the message body is up to date.
   209  		// We had saved the state so to speed up convergence and be able to avoid accepting create events.
   210  		// Now we will rebroadcast the message if 2 conditions are met:
   211  		// 1) we had already synced this network (during the network join)
   212  		// 2) the residual reapTime is higher than 1/6 of the total reapTime.
   213  		// If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around
   214  		// most likely the cluster is already aware of it
   215  		// This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around
   216  		// forever
   217  		//logrus.Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync)
   218  		return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6
   219  	}
   220  
   221  	var op opType
   222  	switch tEvent.Type {
   223  	case TableEventTypeCreate:
   224  		op = opCreate
   225  	case TableEventTypeUpdate:
   226  		op = opUpdate
   227  	case TableEventTypeDelete:
   228  		op = opDelete
   229  	}
   230  
   231  	nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value))
   232  	return network.inSync
   233  }
   234  
   235  func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) {
   236  	// Decode the parts
   237  	parts, err := decodeCompoundMessage(buf)
   238  	if err != nil {
   239  		logrus.Errorf("Failed to decode compound request: %v", err)
   240  		return
   241  	}
   242  
   243  	// Handle each message
   244  	for _, part := range parts {
   245  		nDB.handleMessage(part, isBulkSync)
   246  	}
   247  }
   248  
   249  func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
   250  	var tEvent TableEvent
   251  	if err := proto.Unmarshal(buf, &tEvent); err != nil {
   252  		logrus.Errorf("Error decoding table event message: %v", err)
   253  		return
   254  	}
   255  
   256  	// Ignore messages that this node generated.
   257  	if tEvent.NodeName == nDB.config.NodeID {
   258  		return
   259  	}
   260  
   261  	if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast {
   262  		var err error
   263  		buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
   264  		if err != nil {
   265  			logrus.Errorf("Error marshalling gossip message for network event rebroadcast: %v", err)
   266  			return
   267  		}
   268  
   269  		nDB.RLock()
   270  		n, ok := nDB.networks[nDB.config.NodeID][tEvent.NetworkID]
   271  		nDB.RUnlock()
   272  
   273  		// if the network is not there anymore, OR we are leaving the network OR the broadcast queue is not present
   274  		if !ok || n.leaving || n.tableBroadcasts == nil {
   275  			return
   276  		}
   277  
   278  		// if the queue is over the threshold, avoid distributing information coming from TCP sync
   279  		if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync {
   280  			return
   281  		}
   282  
   283  		n.tableBroadcasts.QueueBroadcast(&tableEventMessage{
   284  			msg:   buf,
   285  			id:    tEvent.NetworkID,
   286  			tname: tEvent.TableName,
   287  			key:   tEvent.Key,
   288  		})
   289  	}
   290  }
   291  
   292  func (nDB *NetworkDB) handleNodeMessage(buf []byte) {
   293  	var nEvent NodeEvent
   294  	if err := proto.Unmarshal(buf, &nEvent); err != nil {
   295  		logrus.Errorf("Error decoding node event message: %v", err)
   296  		return
   297  	}
   298  
   299  	if rebroadcast := nDB.handleNodeEvent(&nEvent); rebroadcast {
   300  		var err error
   301  		buf, err = encodeRawMessage(MessageTypeNodeEvent, buf)
   302  		if err != nil {
   303  			logrus.Errorf("Error marshalling gossip message for node event rebroadcast: %v", err)
   304  			return
   305  		}
   306  
   307  		nDB.nodeBroadcasts.QueueBroadcast(&nodeEventMessage{
   308  			msg: buf,
   309  		})
   310  	}
   311  }
   312  
   313  func (nDB *NetworkDB) handleNetworkMessage(buf []byte) {
   314  	var nEvent NetworkEvent
   315  	if err := proto.Unmarshal(buf, &nEvent); err != nil {
   316  		logrus.Errorf("Error decoding network event message: %v", err)
   317  		return
   318  	}
   319  
   320  	if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast {
   321  		var err error
   322  		buf, err = encodeRawMessage(MessageTypeNetworkEvent, buf)
   323  		if err != nil {
   324  			logrus.Errorf("Error marshalling gossip message for network event rebroadcast: %v", err)
   325  			return
   326  		}
   327  
   328  		nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{
   329  			msg:  buf,
   330  			id:   nEvent.NetworkID,
   331  			node: nEvent.NodeName,
   332  		})
   333  	}
   334  }
   335  
   336  func (nDB *NetworkDB) handleBulkSync(buf []byte) {
   337  	var bsm BulkSyncMessage
   338  	if err := proto.Unmarshal(buf, &bsm); err != nil {
   339  		logrus.Errorf("Error decoding bulk sync message: %v", err)
   340  		return
   341  	}
   342  
   343  	if bsm.LTime > 0 {
   344  		nDB.tableClock.Witness(bsm.LTime)
   345  	}
   346  
   347  	nDB.handleMessage(bsm.Payload, true)
   348  
   349  	// Don't respond to a bulk sync which was not unsolicited
   350  	if !bsm.Unsolicited {
   351  		nDB.Lock()
   352  		ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName]
   353  		if ok {
   354  			close(ch)
   355  			delete(nDB.bulkSyncAckTbl, bsm.NodeName)
   356  		}
   357  		nDB.Unlock()
   358  
   359  		return
   360  	}
   361  
   362  	var nodeAddr net.IP
   363  	nDB.RLock()
   364  	if node, ok := nDB.nodes[bsm.NodeName]; ok {
   365  		nodeAddr = node.Addr
   366  	}
   367  	nDB.RUnlock()
   368  
   369  	if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil {
   370  		logrus.Errorf("Error in responding to bulk sync from node %s: %v", nodeAddr, err)
   371  	}
   372  }
   373  
   374  func (nDB *NetworkDB) handleMessage(buf []byte, isBulkSync bool) {
   375  	mType, data, err := decodeMessage(buf)
   376  	if err != nil {
   377  		logrus.Errorf("Error decoding gossip message to get message type: %v", err)
   378  		return
   379  	}
   380  
   381  	switch mType {
   382  	case MessageTypeNodeEvent:
   383  		nDB.handleNodeMessage(data)
   384  	case MessageTypeNetworkEvent:
   385  		nDB.handleNetworkMessage(data)
   386  	case MessageTypeTableEvent:
   387  		nDB.handleTableMessage(data, isBulkSync)
   388  	case MessageTypeBulkSync:
   389  		nDB.handleBulkSync(data)
   390  	case MessageTypeCompound:
   391  		nDB.handleCompound(data, isBulkSync)
   392  	default:
   393  		logrus.Errorf("%v(%v): unknown message type %d", nDB.config.Hostname, nDB.config.NodeID, mType)
   394  	}
   395  }
   396  
   397  func (d *delegate) NotifyMsg(buf []byte) {
   398  	if len(buf) == 0 {
   399  		return
   400  	}
   401  
   402  	d.nDB.handleMessage(buf, false)
   403  }
   404  
   405  func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
   406  	msgs := d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit)
   407  	msgs = append(msgs, d.nDB.nodeBroadcasts.GetBroadcasts(overhead, limit)...)
   408  	return msgs
   409  }
   410  
   411  func (d *delegate) LocalState(join bool) []byte {
   412  	if join {
   413  		// Update all the local node/network state to a new time to
   414  		// force update on the node we are trying to rejoin, just in
   415  		// case that node has these in leaving state still. This is
   416  		// facilitate fast convergence after recovering from a gossip
   417  		// failure.
   418  		d.nDB.updateLocalNetworkTime()
   419  	}
   420  
   421  	d.nDB.RLock()
   422  	defer d.nDB.RUnlock()
   423  
   424  	pp := NetworkPushPull{
   425  		LTime:    d.nDB.networkClock.Time(),
   426  		NodeName: d.nDB.config.NodeID,
   427  	}
   428  
   429  	for name, nn := range d.nDB.networks {
   430  		for _, n := range nn {
   431  			pp.Networks = append(pp.Networks, &NetworkEntry{
   432  				LTime:     n.ltime,
   433  				NetworkID: n.id,
   434  				NodeName:  name,
   435  				Leaving:   n.leaving,
   436  			})
   437  		}
   438  	}
   439  
   440  	buf, err := encodeMessage(MessageTypePushPull, &pp)
   441  	if err != nil {
   442  		logrus.Errorf("Failed to encode local network state: %v", err)
   443  		return nil
   444  	}
   445  
   446  	return buf
   447  }
   448  
   449  func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
   450  	if len(buf) == 0 {
   451  		logrus.Error("zero byte remote network state received")
   452  		return
   453  	}
   454  
   455  	var gMsg GossipMessage
   456  	err := proto.Unmarshal(buf, &gMsg)
   457  	if err != nil {
   458  		logrus.Errorf("Error unmarshalling push pull message: %v", err)
   459  		return
   460  	}
   461  
   462  	if gMsg.Type != MessageTypePushPull {
   463  		logrus.Errorf("Invalid message type %v received from remote", buf[0])
   464  	}
   465  
   466  	pp := NetworkPushPull{}
   467  	if err := proto.Unmarshal(gMsg.Data, &pp); err != nil {
   468  		logrus.Errorf("Failed to decode remote network state: %v", err)
   469  		return
   470  	}
   471  
   472  	nodeEvent := &NodeEvent{
   473  		LTime:    pp.LTime,
   474  		NodeName: pp.NodeName,
   475  		Type:     NodeEventTypeJoin,
   476  	}
   477  	d.nDB.handleNodeEvent(nodeEvent)
   478  
   479  	for _, n := range pp.Networks {
   480  		nEvent := &NetworkEvent{
   481  			LTime:     n.LTime,
   482  			NodeName:  n.NodeName,
   483  			NetworkID: n.NetworkID,
   484  			Type:      NetworkEventTypeJoin,
   485  		}
   486  
   487  		if n.Leaving {
   488  			nEvent.Type = NetworkEventTypeLeave
   489  		}
   490  
   491  		d.nDB.handleNetworkEvent(nEvent)
   492  	}
   493  
   494  }