github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/networkdb/delegate.go (about)

     1  package networkdb
     2  
     3  import (
     4  	"context"
     5  	"net"
     6  	"time"
     7  
     8  	"github.com/containerd/log"
     9  	"github.com/gogo/protobuf/proto"
    10  )
    11  
    12  type delegate struct {
    13  	nDB *NetworkDB
    14  }
    15  
    16  func (d *delegate) NodeMeta(limit int) []byte {
    17  	return []byte{}
    18  }
    19  
    20  func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool {
    21  	// Update our local clock if the received messages has newer
    22  	// time.
    23  	nDB.networkClock.Witness(nEvent.LTime)
    24  
    25  	nDB.Lock()
    26  	defer nDB.Unlock()
    27  
    28  	// check if the node exists
    29  	n, _, _ := nDB.findNode(nEvent.NodeName)
    30  	if n == nil {
    31  		return false
    32  	}
    33  
    34  	// check if the event is fresh
    35  	if n.ltime >= nEvent.LTime {
    36  		return false
    37  	}
    38  
    39  	// If we are here means that the event is fresher and the node is known. Update the laport time
    40  	n.ltime = nEvent.LTime
    41  
    42  	// If the node is not known from memberlist we cannot process save any state of it else if it actually
    43  	// dies we won't receive any notification and we will remain stuck with it
    44  	if _, ok := nDB.nodes[nEvent.NodeName]; !ok {
    45  		log.G(context.TODO()).Errorf("node: %s is unknown to memberlist", nEvent.NodeName)
    46  		return false
    47  	}
    48  
    49  	switch nEvent.Type {
    50  	case NodeEventTypeJoin:
    51  		moved, err := nDB.changeNodeState(n.Name, nodeActiveState)
    52  		if err != nil {
    53  			log.G(context.TODO()).WithError(err).Error("unable to find the node to move")
    54  			return false
    55  		}
    56  		if moved {
    57  			log.G(context.TODO()).Infof("%v(%v): Node join event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr)
    58  		}
    59  		return moved
    60  	case NodeEventTypeLeave:
    61  		moved, err := nDB.changeNodeState(n.Name, nodeLeftState)
    62  		if err != nil {
    63  			log.G(context.TODO()).WithError(err).Error("unable to find the node to move")
    64  			return false
    65  		}
    66  		if moved {
    67  			log.G(context.TODO()).Infof("%v(%v): Node leave event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr)
    68  		}
    69  		return moved
    70  	}
    71  
    72  	return false
    73  }
    74  
    75  func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
    76  	// Update our local clock if the received messages has newer
    77  	// time.
    78  	nDB.networkClock.Witness(nEvent.LTime)
    79  
    80  	nDB.Lock()
    81  	defer nDB.Unlock()
    82  
    83  	if nEvent.NodeName == nDB.config.NodeID {
    84  		return false
    85  	}
    86  
    87  	nodeNetworks, ok := nDB.networks[nEvent.NodeName]
    88  	if !ok {
    89  		// We haven't heard about this node at all.  Ignore the leave
    90  		if nEvent.Type == NetworkEventTypeLeave {
    91  			return false
    92  		}
    93  
    94  		nodeNetworks = make(map[string]*network)
    95  		nDB.networks[nEvent.NodeName] = nodeNetworks
    96  	}
    97  
    98  	if n, ok := nodeNetworks[nEvent.NetworkID]; ok {
    99  		// We have the latest state. Ignore the event
   100  		// since it is stale.
   101  		if n.ltime >= nEvent.LTime {
   102  			return false
   103  		}
   104  
   105  		n.ltime = nEvent.LTime
   106  		n.leaving = nEvent.Type == NetworkEventTypeLeave
   107  		if n.leaving {
   108  			n.reapTime = nDB.config.reapNetworkInterval
   109  
   110  			// The remote node is leaving the network, but not the gossip cluster.
   111  			// Mark all its entries in deleted state, this will guarantee that
   112  			// if some node bulk sync with us, the deleted state of
   113  			// these entries will be propagated.
   114  			nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName)
   115  		}
   116  
   117  		if nEvent.Type == NetworkEventTypeLeave {
   118  			nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   119  		} else {
   120  			nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   121  		}
   122  
   123  		return true
   124  	}
   125  
   126  	if nEvent.Type == NetworkEventTypeLeave {
   127  		return false
   128  	}
   129  
   130  	// If the node is not known from memberlist we cannot process save any state of it else if it actually
   131  	// dies we won't receive any notification and we will remain stuck with it
   132  	if _, ok := nDB.nodes[nEvent.NodeName]; !ok {
   133  		return false
   134  	}
   135  
   136  	// This remote network join is being seen the first time.
   137  	nodeNetworks[nEvent.NetworkID] = &network{
   138  		id:    nEvent.NetworkID,
   139  		ltime: nEvent.LTime,
   140  	}
   141  
   142  	nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
   143  	return true
   144  }
   145  
   146  func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool {
   147  	// Update our local clock if the received messages has newer time.
   148  	nDB.tableClock.Witness(tEvent.LTime)
   149  
   150  	// Ignore the table events for networks that are in the process of going away
   151  	nDB.RLock()
   152  	networks := nDB.networks[nDB.config.NodeID]
   153  	network, ok := networks[tEvent.NetworkID]
   154  	// Check if the owner of the event is still part of the network
   155  	nodes := nDB.networkNodes[tEvent.NetworkID]
   156  	var nodePresent bool
   157  	for _, node := range nodes {
   158  		if node == tEvent.NodeName {
   159  			nodePresent = true
   160  			break
   161  		}
   162  	}
   163  	nDB.RUnlock()
   164  
   165  	if !ok || network.leaving || !nodePresent {
   166  		// I'm out of the network OR the event owner is not anymore part of the network so do not propagate
   167  		return false
   168  	}
   169  
   170  	nDB.Lock()
   171  	e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key)
   172  	if err == nil {
   173  		// We have the latest state. Ignore the event
   174  		// since it is stale.
   175  		if e.ltime >= tEvent.LTime {
   176  			nDB.Unlock()
   177  			return false
   178  		}
   179  	} else if tEvent.Type == TableEventTypeDelete && !isBulkSync {
   180  		nDB.Unlock()
   181  		// We don't know the entry, the entry is being deleted and the message is an async message
   182  		// In this case the safest approach is to ignore it, it is possible that the queue grew so much to
   183  		// exceed the garbage collection time (the residual reap time that is in the message is not being
   184  		// updated, to avoid inserting too many messages in the queue).
   185  		// Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time
   186  		return false
   187  	}
   188  
   189  	e = &entry{
   190  		ltime:    tEvent.LTime,
   191  		node:     tEvent.NodeName,
   192  		value:    tEvent.Value,
   193  		deleting: tEvent.Type == TableEventTypeDelete,
   194  		reapTime: time.Duration(tEvent.ResidualReapTime) * time.Second,
   195  	}
   196  
   197  	// All the entries marked for deletion should have a reapTime set greater than 0
   198  	// This case can happen if the cluster is running different versions of the engine where the old version does not have the
   199  	// field. If that is not the case, this can be a BUG
   200  	if e.deleting && e.reapTime == 0 {
   201  		log.G(context.TODO()).Warnf("%v(%v) handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?",
   202  			nDB.config.Hostname, nDB.config.NodeID, tEvent)
   203  		e.reapTime = nDB.config.reapEntryInterval
   204  	}
   205  	nDB.createOrUpdateEntry(tEvent.NetworkID, tEvent.TableName, tEvent.Key, e)
   206  	nDB.Unlock()
   207  
   208  	if err != nil && tEvent.Type == TableEventTypeDelete {
   209  		// Again we don't know the entry but this is coming from a TCP sync so the message body is up to date.
   210  		// We had saved the state so to speed up convergence and be able to avoid accepting create events.
   211  		// Now we will rebroadcast the message if 2 conditions are met:
   212  		// 1) we had already synced this network (during the network join)
   213  		// 2) the residual reapTime is higher than 1/6 of the total reapTime.
   214  		// If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around
   215  		// most likely the cluster is already aware of it
   216  		// This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around
   217  		// forever
   218  		// log.G(ctx).Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync)
   219  		return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6
   220  	}
   221  
   222  	var op opType
   223  	switch tEvent.Type {
   224  	case TableEventTypeCreate:
   225  		op = opCreate
   226  	case TableEventTypeUpdate:
   227  		op = opUpdate
   228  	case TableEventTypeDelete:
   229  		op = opDelete
   230  	}
   231  
   232  	nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value))
   233  	return network.inSync
   234  }
   235  
   236  func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) {
   237  	// Decode the parts
   238  	parts, err := decodeCompoundMessage(buf)
   239  	if err != nil {
   240  		log.G(context.TODO()).Errorf("Failed to decode compound request: %v", err)
   241  		return
   242  	}
   243  
   244  	// Handle each message
   245  	for _, part := range parts {
   246  		nDB.handleMessage(part, isBulkSync)
   247  	}
   248  }
   249  
   250  func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
   251  	var tEvent TableEvent
   252  	if err := proto.Unmarshal(buf, &tEvent); err != nil {
   253  		log.G(context.TODO()).Errorf("Error decoding table event message: %v", err)
   254  		return
   255  	}
   256  
   257  	// Ignore messages that this node generated.
   258  	if tEvent.NodeName == nDB.config.NodeID {
   259  		return
   260  	}
   261  
   262  	if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast {
   263  		var err error
   264  		buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
   265  		if err != nil {
   266  			log.G(context.TODO()).Errorf("Error marshalling gossip message for network event rebroadcast: %v", err)
   267  			return
   268  		}
   269  
   270  		nDB.RLock()
   271  		n, ok := nDB.networks[nDB.config.NodeID][tEvent.NetworkID]
   272  		nDB.RUnlock()
   273  
   274  		// if the network is not there anymore, OR we are leaving the network OR the broadcast queue is not present
   275  		if !ok || n.leaving || n.tableBroadcasts == nil {
   276  			return
   277  		}
   278  
   279  		// if the queue is over the threshold, avoid distributing information coming from TCP sync
   280  		if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync {
   281  			return
   282  		}
   283  
   284  		n.tableBroadcasts.QueueBroadcast(&tableEventMessage{
   285  			msg:   buf,
   286  			id:    tEvent.NetworkID,
   287  			tname: tEvent.TableName,
   288  			key:   tEvent.Key,
   289  		})
   290  	}
   291  }
   292  
   293  func (nDB *NetworkDB) handleNodeMessage(buf []byte) {
   294  	var nEvent NodeEvent
   295  	if err := proto.Unmarshal(buf, &nEvent); err != nil {
   296  		log.G(context.TODO()).Errorf("Error decoding node event message: %v", err)
   297  		return
   298  	}
   299  
   300  	if rebroadcast := nDB.handleNodeEvent(&nEvent); rebroadcast {
   301  		var err error
   302  		buf, err = encodeRawMessage(MessageTypeNodeEvent, buf)
   303  		if err != nil {
   304  			log.G(context.TODO()).Errorf("Error marshalling gossip message for node event rebroadcast: %v", err)
   305  			return
   306  		}
   307  
   308  		nDB.nodeBroadcasts.QueueBroadcast(&nodeEventMessage{
   309  			msg: buf,
   310  		})
   311  	}
   312  }
   313  
   314  func (nDB *NetworkDB) handleNetworkMessage(buf []byte) {
   315  	var nEvent NetworkEvent
   316  	if err := proto.Unmarshal(buf, &nEvent); err != nil {
   317  		log.G(context.TODO()).Errorf("Error decoding network event message: %v", err)
   318  		return
   319  	}
   320  
   321  	if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast {
   322  		var err error
   323  		buf, err = encodeRawMessage(MessageTypeNetworkEvent, buf)
   324  		if err != nil {
   325  			log.G(context.TODO()).Errorf("Error marshalling gossip message for network event rebroadcast: %v", err)
   326  			return
   327  		}
   328  
   329  		nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{
   330  			msg:  buf,
   331  			id:   nEvent.NetworkID,
   332  			node: nEvent.NodeName,
   333  		})
   334  	}
   335  }
   336  
   337  func (nDB *NetworkDB) handleBulkSync(buf []byte) {
   338  	var bsm BulkSyncMessage
   339  	if err := proto.Unmarshal(buf, &bsm); err != nil {
   340  		log.G(context.TODO()).Errorf("Error decoding bulk sync message: %v", err)
   341  		return
   342  	}
   343  
   344  	if bsm.LTime > 0 {
   345  		nDB.tableClock.Witness(bsm.LTime)
   346  	}
   347  
   348  	nDB.handleMessage(bsm.Payload, true)
   349  
   350  	// Don't respond to a bulk sync which was not unsolicited
   351  	if !bsm.Unsolicited {
   352  		nDB.Lock()
   353  		ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName]
   354  		if ok {
   355  			close(ch)
   356  			delete(nDB.bulkSyncAckTbl, bsm.NodeName)
   357  		}
   358  		nDB.Unlock()
   359  
   360  		return
   361  	}
   362  
   363  	var nodeAddr net.IP
   364  	nDB.RLock()
   365  	if node, ok := nDB.nodes[bsm.NodeName]; ok {
   366  		nodeAddr = node.Addr
   367  	}
   368  	nDB.RUnlock()
   369  
   370  	if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil {
   371  		log.G(context.TODO()).Errorf("Error in responding to bulk sync from node %s: %v", nodeAddr, err)
   372  	}
   373  }
   374  
   375  func (nDB *NetworkDB) handleMessage(buf []byte, isBulkSync bool) {
   376  	mType, data, err := decodeMessage(buf)
   377  	if err != nil {
   378  		log.G(context.TODO()).Errorf("Error decoding gossip message to get message type: %v", err)
   379  		return
   380  	}
   381  
   382  	switch mType {
   383  	case MessageTypeNodeEvent:
   384  		nDB.handleNodeMessage(data)
   385  	case MessageTypeNetworkEvent:
   386  		nDB.handleNetworkMessage(data)
   387  	case MessageTypeTableEvent:
   388  		nDB.handleTableMessage(data, isBulkSync)
   389  	case MessageTypeBulkSync:
   390  		nDB.handleBulkSync(data)
   391  	case MessageTypeCompound:
   392  		nDB.handleCompound(data, isBulkSync)
   393  	default:
   394  		log.G(context.TODO()).Errorf("%v(%v): unknown message type %d", nDB.config.Hostname, nDB.config.NodeID, mType)
   395  	}
   396  }
   397  
   398  func (d *delegate) NotifyMsg(buf []byte) {
   399  	if len(buf) == 0 {
   400  		return
   401  	}
   402  
   403  	d.nDB.handleMessage(buf, false)
   404  }
   405  
   406  func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
   407  	msgs := d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit)
   408  	msgs = append(msgs, d.nDB.nodeBroadcasts.GetBroadcasts(overhead, limit)...)
   409  	return msgs
   410  }
   411  
   412  func (d *delegate) LocalState(join bool) []byte {
   413  	if join {
   414  		// Update all the local node/network state to a new time to
   415  		// force update on the node we are trying to rejoin, just in
   416  		// case that node has these in leaving state still. This is
   417  		// facilitate fast convergence after recovering from a gossip
   418  		// failure.
   419  		d.nDB.updateLocalNetworkTime()
   420  	}
   421  
   422  	d.nDB.RLock()
   423  	defer d.nDB.RUnlock()
   424  
   425  	pp := NetworkPushPull{
   426  		LTime:    d.nDB.networkClock.Time(),
   427  		NodeName: d.nDB.config.NodeID,
   428  	}
   429  
   430  	for name, nn := range d.nDB.networks {
   431  		for _, n := range nn {
   432  			pp.Networks = append(pp.Networks, &NetworkEntry{
   433  				LTime:     n.ltime,
   434  				NetworkID: n.id,
   435  				NodeName:  name,
   436  				Leaving:   n.leaving,
   437  			})
   438  		}
   439  	}
   440  
   441  	buf, err := encodeMessage(MessageTypePushPull, &pp)
   442  	if err != nil {
   443  		log.G(context.TODO()).Errorf("Failed to encode local network state: %v", err)
   444  		return nil
   445  	}
   446  
   447  	return buf
   448  }
   449  
   450  func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
   451  	if len(buf) == 0 {
   452  		log.G(context.TODO()).Error("zero byte remote network state received")
   453  		return
   454  	}
   455  
   456  	var gMsg GossipMessage
   457  	err := proto.Unmarshal(buf, &gMsg)
   458  	if err != nil {
   459  		log.G(context.TODO()).Errorf("Error unmarshalling push pull message: %v", err)
   460  		return
   461  	}
   462  
   463  	if gMsg.Type != MessageTypePushPull {
   464  		log.G(context.TODO()).Errorf("Invalid message type %v received from remote", buf[0])
   465  	}
   466  
   467  	pp := NetworkPushPull{}
   468  	if err := proto.Unmarshal(gMsg.Data, &pp); err != nil {
   469  		log.G(context.TODO()).Errorf("Failed to decode remote network state: %v", err)
   470  		return
   471  	}
   472  
   473  	nodeEvent := &NodeEvent{
   474  		LTime:    pp.LTime,
   475  		NodeName: pp.NodeName,
   476  		Type:     NodeEventTypeJoin,
   477  	}
   478  	d.nDB.handleNodeEvent(nodeEvent)
   479  
   480  	for _, n := range pp.Networks {
   481  		nEvent := &NetworkEvent{
   482  			LTime:     n.LTime,
   483  			NodeName:  n.NodeName,
   484  			NetworkID: n.NetworkID,
   485  			Type:      NetworkEventTypeJoin,
   486  		}
   487  
   488  		if n.Leaving {
   489  			nEvent.Type = NetworkEventTypeLeave
   490  		}
   491  
   492  		d.nDB.handleNetworkEvent(nEvent)
   493  	}
   494  }