github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/networkdb/cluster.go

github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/networkdb/cluster.go (about)

     1  package networkdb
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/rand"
     7  	"encoding/hex"
     8  	"fmt"
     9  	"log"
    10  	"math/big"
    11  	rnd "math/rand"
    12  	"net"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/hashicorp/memberlist"
    17  	"github.com/sirupsen/logrus"
    18  )
    19  
    20  const (
    21  	reapPeriod       = 5 * time.Second
    22  	retryInterval    = 1 * time.Second
    23  	nodeReapInterval = 24 * time.Hour
    24  	nodeReapPeriod   = 2 * time.Hour
    25  	// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
    26  	// the following is roughly 1 minute
    27  	maxQueueLenBroadcastOnSync = 500
    28  )
    29  
    30  type logWriter struct{}
    31  
    32  func (l *logWriter) Write(p []byte) (int, error) {
    33  	str := string(p)
    34  	str = strings.TrimSuffix(str, "\n")
    35  
    36  	switch {
    37  	case strings.HasPrefix(str, "[WARN] "):
    38  		str = strings.TrimPrefix(str, "[WARN] ")
    39  		logrus.Warn(str)
    40  	case strings.HasPrefix(str, "[DEBUG] "):
    41  		str = strings.TrimPrefix(str, "[DEBUG] ")
    42  		logrus.Debug(str)
    43  	case strings.HasPrefix(str, "[INFO] "):
    44  		str = strings.TrimPrefix(str, "[INFO] ")
    45  		logrus.Info(str)
    46  	case strings.HasPrefix(str, "[ERR] "):
    47  		str = strings.TrimPrefix(str, "[ERR] ")
    48  		logrus.Warn(str)
    49  	}
    50  
    51  	return len(p), nil
    52  }
    53  
    54  // SetKey adds a new key to the key ring
    55  func (nDB *NetworkDB) SetKey(key []byte) {
    56  	logrus.Debugf("Adding key %.5s", hex.EncodeToString(key))
    57  	nDB.Lock()
    58  	defer nDB.Unlock()
    59  	for _, dbKey := range nDB.config.Keys {
    60  		if bytes.Equal(key, dbKey) {
    61  			return
    62  		}
    63  	}
    64  	nDB.config.Keys = append(nDB.config.Keys, key)
    65  	if nDB.keyring != nil {
    66  		nDB.keyring.AddKey(key)
    67  	}
    68  }
    69  
    70  // SetPrimaryKey sets the given key as the primary key. This should have
    71  // been added apriori through SetKey
    72  func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
    73  	logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key))
    74  	nDB.RLock()
    75  	defer nDB.RUnlock()
    76  	for _, dbKey := range nDB.config.Keys {
    77  		if bytes.Equal(key, dbKey) {
    78  			if nDB.keyring != nil {
    79  				nDB.keyring.UseKey(dbKey)
    80  			}
    81  			break
    82  		}
    83  	}
    84  }
    85  
    86  // RemoveKey removes a key from the key ring. The key being removed
    87  // can't be the primary key
    88  func (nDB *NetworkDB) RemoveKey(key []byte) {
    89  	logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key))
    90  	nDB.Lock()
    91  	defer nDB.Unlock()
    92  	for i, dbKey := range nDB.config.Keys {
    93  		if bytes.Equal(key, dbKey) {
    94  			nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
    95  			if nDB.keyring != nil {
    96  				nDB.keyring.RemoveKey(dbKey)
    97  			}
    98  			break
    99  		}
   100  	}
   101  }
   102  
   103  func (nDB *NetworkDB) clusterInit() error {
   104  	nDB.lastStatsTimestamp = time.Now()
   105  	nDB.lastHealthTimestamp = nDB.lastStatsTimestamp
   106  
   107  	config := memberlist.DefaultLANConfig()
   108  	config.Name = nDB.config.NodeID
   109  	config.BindAddr = nDB.config.BindAddr
   110  	config.AdvertiseAddr = nDB.config.AdvertiseAddr
   111  	config.UDPBufferSize = nDB.config.PacketBufferSize
   112  
   113  	if nDB.config.BindPort != 0 {
   114  		config.BindPort = nDB.config.BindPort
   115  	}
   116  
   117  	config.ProtocolVersion = memberlist.ProtocolVersion2Compatible
   118  	config.Delegate = &delegate{nDB: nDB}
   119  	config.Events = &eventDelegate{nDB: nDB}
   120  	// custom logger that does not add time or date, so they are not
   121  	// duplicated by logrus
   122  	config.Logger = log.New(&logWriter{}, "", 0)
   123  
   124  	var err error
   125  	if len(nDB.config.Keys) > 0 {
   126  		for i, key := range nDB.config.Keys {
   127  			logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key))
   128  		}
   129  		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
   130  		if err != nil {
   131  			return err
   132  		}
   133  		config.Keyring = nDB.keyring
   134  	}
   135  
   136  	nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{
   137  		NumNodes: func() int {
   138  			nDB.RLock()
   139  			num := len(nDB.nodes)
   140  			nDB.RUnlock()
   141  			return num
   142  		},
   143  		RetransmitMult: config.RetransmitMult,
   144  	}
   145  
   146  	nDB.nodeBroadcasts = &memberlist.TransmitLimitedQueue{
   147  		NumNodes: func() int {
   148  			nDB.RLock()
   149  			num := len(nDB.nodes)
   150  			nDB.RUnlock()
   151  			return num
   152  		},
   153  		RetransmitMult: config.RetransmitMult,
   154  	}
   155  
   156  	mlist, err := memberlist.Create(config)
   157  	if err != nil {
   158  		return fmt.Errorf("failed to create memberlist: %v", err)
   159  	}
   160  
   161  	nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background())
   162  	nDB.memberlist = mlist
   163  
   164  	for _, trigger := range []struct {
   165  		interval time.Duration
   166  		fn       func()
   167  	}{
   168  		{reapPeriod, nDB.reapState},
   169  		{config.GossipInterval, nDB.gossip},
   170  		{config.PushPullInterval, nDB.bulkSyncTables},
   171  		{retryInterval, nDB.reconnectNode},
   172  		{nodeReapPeriod, nDB.reapDeadNode},
   173  		{nDB.config.rejoinClusterInterval, nDB.rejoinClusterBootStrap},
   174  	} {
   175  		t := time.NewTicker(trigger.interval)
   176  		go nDB.triggerFunc(trigger.interval, t.C, trigger.fn)
   177  		nDB.tickers = append(nDB.tickers, t)
   178  	}
   179  
   180  	return nil
   181  }
   182  
   183  func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) {
   184  	t := time.NewTicker(retryInterval)
   185  	defer t.Stop()
   186  
   187  	for {
   188  		select {
   189  		case <-t.C:
   190  			if _, err := nDB.memberlist.Join(members); err != nil {
   191  				logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err)
   192  				continue
   193  			}
   194  			if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   195  				logrus.Errorf("failed to send node join on retry: %v", err)
   196  				continue
   197  			}
   198  			return
   199  		case <-ctx.Done():
   200  			return
   201  		}
   202  	}
   203  
   204  }
   205  
   206  func (nDB *NetworkDB) clusterJoin(members []string) error {
   207  	mlist := nDB.memberlist
   208  
   209  	if _, err := mlist.Join(members); err != nil {
   210  		// In case of failure, we no longer need to explicitly call retryJoin.
   211  		// rejoinClusterBootStrap, which runs every nDB.config.rejoinClusterInterval,
   212  		// will retryJoin for nDB.config.rejoinClusterDuration.
   213  		return fmt.Errorf("could not join node to memberlist: %v", err)
   214  	}
   215  
   216  	if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   217  		return fmt.Errorf("failed to send node join: %v", err)
   218  	}
   219  
   220  	return nil
   221  }
   222  
   223  func (nDB *NetworkDB) clusterLeave() error {
   224  	mlist := nDB.memberlist
   225  
   226  	if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil {
   227  		logrus.Errorf("failed to send node leave: %v", err)
   228  	}
   229  
   230  	if err := mlist.Leave(time.Second); err != nil {
   231  		return err
   232  	}
   233  
   234  	// cancel the context
   235  	nDB.cancelCtx()
   236  
   237  	for _, t := range nDB.tickers {
   238  		t.Stop()
   239  	}
   240  
   241  	return mlist.Shutdown()
   242  }
   243  
   244  func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) {
   245  	// Use a random stagger to avoid synchronizing
   246  	randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger)) //nolint:gosec // gosec complains about the use of rand here. It should be fine.
   247  	select {
   248  	case <-time.After(randStagger):
   249  	case <-nDB.ctx.Done():
   250  		return
   251  	}
   252  	for {
   253  		select {
   254  		case <-C:
   255  			f()
   256  		case <-nDB.ctx.Done():
   257  			return
   258  		}
   259  	}
   260  }
   261  
   262  func (nDB *NetworkDB) reapDeadNode() {
   263  	nDB.Lock()
   264  	defer nDB.Unlock()
   265  	for _, nodeMap := range []map[string]*node{
   266  		nDB.failedNodes,
   267  		nDB.leftNodes,
   268  	} {
   269  		for id, n := range nodeMap {
   270  			if n.reapTime > nodeReapPeriod {
   271  				n.reapTime -= nodeReapPeriod
   272  				continue
   273  			}
   274  			logrus.Debugf("Garbage collect node %v", n.Name)
   275  			delete(nodeMap, id)
   276  		}
   277  	}
   278  }
   279  
   280  // rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster,
   281  // if not, call the cluster join to merge 2 separate clusters that are formed when all managers
   282  // stopped/started at the same time
   283  func (nDB *NetworkDB) rejoinClusterBootStrap() {
   284  	nDB.RLock()
   285  	if len(nDB.bootStrapIP) == 0 {
   286  		nDB.RUnlock()
   287  		return
   288  	}
   289  
   290  	myself, ok := nDB.nodes[nDB.config.NodeID]
   291  	if !ok {
   292  		nDB.RUnlock()
   293  		logrus.Warnf("rejoinClusterBootstrap unable to find local node info using ID:%v", nDB.config.NodeID)
   294  		return
   295  	}
   296  	bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP))
   297  	for _, bootIP := range nDB.bootStrapIP {
   298  		// botostrap IPs are usually IP:port from the Join
   299  		var bootstrapIP net.IP
   300  		ipStr, _, err := net.SplitHostPort(bootIP)
   301  		if err != nil {
   302  			// try to parse it as an IP with port
   303  			// Note this seems to be the case for swarm that do not specify any port
   304  			ipStr = bootIP
   305  		}
   306  		bootstrapIP = net.ParseIP(ipStr)
   307  		if bootstrapIP != nil {
   308  			for _, node := range nDB.nodes {
   309  				if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) {
   310  					// One of the bootstrap nodes (and not myself) is part of the cluster, return
   311  					nDB.RUnlock()
   312  					return
   313  				}
   314  			}
   315  			bootStrapIPs = append(bootStrapIPs, bootIP)
   316  		}
   317  	}
   318  	nDB.RUnlock()
   319  	if len(bootStrapIPs) == 0 {
   320  		// this will also avoid to call the Join with an empty list erasing the current bootstrap ip list
   321  		logrus.Debug("rejoinClusterBootStrap did not find any valid IP")
   322  		return
   323  	}
   324  	// None of the bootStrap nodes are in the cluster, call memberlist join
   325  	logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
   326  	ctx, cancel := context.WithTimeout(nDB.ctx, nDB.config.rejoinClusterDuration)
   327  	defer cancel()
   328  	nDB.retryJoin(ctx, bootStrapIPs)
   329  }
   330  
   331  func (nDB *NetworkDB) reconnectNode() {
   332  	nDB.RLock()
   333  	if len(nDB.failedNodes) == 0 {
   334  		nDB.RUnlock()
   335  		return
   336  	}
   337  
   338  	nodes := make([]*node, 0, len(nDB.failedNodes))
   339  	for _, n := range nDB.failedNodes {
   340  		nodes = append(nodes, n)
   341  	}
   342  	nDB.RUnlock()
   343  
   344  	node := nodes[randomOffset(len(nodes))]
   345  	addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
   346  
   347  	if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil {
   348  		return
   349  	}
   350  
   351  	if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   352  		return
   353  	}
   354  
   355  	logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name)
   356  	nDB.bulkSync([]string{node.Name}, true)
   357  }
   358  
   359  // For timing the entry deletion in the reaper APIs that doesn't use monotonic clock
   360  // source (time.Now, Sub etc.) should be avoided. Hence we use reapTime in every
   361  // entry which is set initially to reapInterval and decremented by reapPeriod every time
   362  // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This
   363  // is safe as long as no other concurrent path touches the reapTime field.
   364  func (nDB *NetworkDB) reapState() {
   365  	// The reapTableEntries leverage the presence of the network so garbage collect entries first
   366  	nDB.reapTableEntries()
   367  	nDB.reapNetworks()
   368  }
   369  
   370  func (nDB *NetworkDB) reapNetworks() {
   371  	nDB.Lock()
   372  	for _, nn := range nDB.networks {
   373  		for id, n := range nn {
   374  			if n.leaving {
   375  				if n.reapTime <= 0 {
   376  					delete(nn, id)
   377  					continue
   378  				}
   379  				n.reapTime -= reapPeriod
   380  			}
   381  		}
   382  	}
   383  	nDB.Unlock()
   384  }
   385  
   386  func (nDB *NetworkDB) reapTableEntries() {
   387  	var nodeNetworks []string
   388  	// This is best effort, if the list of network changes will be picked up in the next cycle
   389  	nDB.RLock()
   390  	for nid := range nDB.networks[nDB.config.NodeID] {
   391  		nodeNetworks = append(nodeNetworks, nid)
   392  	}
   393  	nDB.RUnlock()
   394  
   395  	cycleStart := time.Now()
   396  	// In order to avoid blocking the database for a long time, apply the garbage collection logic by network
   397  	// The lock is taken at the beginning of the cycle and the deletion is inline
   398  	for _, nid := range nodeNetworks {
   399  		nDB.Lock()
   400  		nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool {
   401  			// timeCompensation compensate in case the lock took some time to be released
   402  			timeCompensation := time.Since(cycleStart)
   403  			entry, ok := v.(*entry)
   404  			if !ok || !entry.deleting {
   405  				return false
   406  			}
   407  
   408  			// In this check we are adding an extra 1 second to guarantee that when the number is truncated to int32 to fit the packet
   409  			// for the tableEvent the number is always strictly > 1 and never 0
   410  			if entry.reapTime > reapPeriod+timeCompensation+time.Second {
   411  				entry.reapTime -= reapPeriod + timeCompensation
   412  				return false
   413  			}
   414  
   415  			params := strings.Split(path[1:], "/")
   416  			nid := params[0]
   417  			tname := params[1]
   418  			key := params[2]
   419  
   420  			okTable, okNetwork := nDB.deleteEntry(nid, tname, key)
   421  			if !okTable {
   422  				logrus.Errorf("Table tree delete failed, entry with key:%s does not exists in the table:%s network:%s", key, tname, nid)
   423  			}
   424  			if !okNetwork {
   425  				logrus.Errorf("Network tree delete failed, entry with key:%s does not exists in the network:%s table:%s", key, nid, tname)
   426  			}
   427  
   428  			return false
   429  		})
   430  		nDB.Unlock()
   431  	}
   432  }
   433  
   434  func (nDB *NetworkDB) gossip() {
   435  	networkNodes := make(map[string][]string)
   436  	nDB.RLock()
   437  	thisNodeNetworks := nDB.networks[nDB.config.NodeID]
   438  	for nid := range thisNodeNetworks {
   439  		networkNodes[nid] = nDB.networkNodes[nid]
   440  	}
   441  	printStats := time.Since(nDB.lastStatsTimestamp) >= nDB.config.StatsPrintPeriod
   442  	printHealth := time.Since(nDB.lastHealthTimestamp) >= nDB.config.HealthPrintPeriod
   443  	nDB.RUnlock()
   444  
   445  	if printHealth {
   446  		healthScore := nDB.memberlist.GetHealthScore()
   447  		if healthScore != 0 {
   448  			logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore)
   449  		}
   450  		nDB.lastHealthTimestamp = time.Now()
   451  	}
   452  
   453  	for nid, nodes := range networkNodes {
   454  		mNodes := nDB.mRandomNodes(3, nodes)
   455  		bytesAvail := nDB.config.PacketBufferSize - compoundHeaderOverhead
   456  
   457  		nDB.RLock()
   458  		network, ok := thisNodeNetworks[nid]
   459  		nDB.RUnlock()
   460  		if !ok || network == nil {
   461  			// It is normal for the network to be removed
   462  			// between the time we collect the network
   463  			// attachments of this node and processing
   464  			// them here.
   465  			continue
   466  		}
   467  
   468  		broadcastQ := network.tableBroadcasts
   469  
   470  		if broadcastQ == nil {
   471  			logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid)
   472  			continue
   473  		}
   474  
   475  		msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail)
   476  		// Collect stats and print the queue info, note this code is here also to have a view of the queues empty
   477  		network.qMessagesSent += len(msgs)
   478  		if printStats {
   479  			logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d",
   480  				nDB.config.Hostname, nDB.config.NodeID,
   481  				nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(),
   482  				network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second)))
   483  			network.qMessagesSent = 0
   484  		}
   485  
   486  		if len(msgs) == 0 {
   487  			continue
   488  		}
   489  
   490  		// Create a compound message
   491  		compound := makeCompoundMessage(msgs)
   492  
   493  		for _, node := range mNodes {
   494  			nDB.RLock()
   495  			mnode := nDB.nodes[node]
   496  			nDB.RUnlock()
   497  
   498  			if mnode == nil {
   499  				break
   500  			}
   501  
   502  			// Send the compound message
   503  			if err := nDB.memberlist.SendBestEffort(&mnode.Node, compound); err != nil {
   504  				logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err)
   505  			}
   506  		}
   507  	}
   508  	// Reset the stats
   509  	if printStats {
   510  		nDB.lastStatsTimestamp = time.Now()
   511  	}
   512  }
   513  
   514  func (nDB *NetworkDB) bulkSyncTables() {
   515  	var networks []string
   516  	nDB.RLock()
   517  	for nid, network := range nDB.networks[nDB.config.NodeID] {
   518  		if network.leaving {
   519  			continue
   520  		}
   521  		networks = append(networks, nid)
   522  	}
   523  	nDB.RUnlock()
   524  
   525  	for {
   526  		if len(networks) == 0 {
   527  			break
   528  		}
   529  
   530  		nid := networks[0]
   531  		networks = networks[1:]
   532  
   533  		nDB.RLock()
   534  		nodes := nDB.networkNodes[nid]
   535  		nDB.RUnlock()
   536  
   537  		// No peer nodes on this network. Move on.
   538  		if len(nodes) == 0 {
   539  			continue
   540  		}
   541  
   542  		completed, err := nDB.bulkSync(nodes, false)
   543  		if err != nil {
   544  			logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err)
   545  			continue
   546  		}
   547  
   548  		// Remove all the networks for which we have
   549  		// successfully completed bulk sync in this iteration.
   550  		updatedNetworks := make([]string, 0, len(networks))
   551  		for _, nid := range networks {
   552  			var found bool
   553  			for _, completedNid := range completed {
   554  				if nid == completedNid {
   555  					found = true
   556  					break
   557  				}
   558  			}
   559  
   560  			if !found {
   561  				updatedNetworks = append(updatedNetworks, nid)
   562  			}
   563  		}
   564  
   565  		networks = updatedNetworks
   566  	}
   567  }
   568  
   569  func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
   570  	if !all {
   571  		// Get 2 random nodes. 2nd node will be tried if the bulk sync to
   572  		// 1st node fails.
   573  		nodes = nDB.mRandomNodes(2, nodes)
   574  	}
   575  
   576  	if len(nodes) == 0 {
   577  		return nil, nil
   578  	}
   579  
   580  	var err error
   581  	var networks []string
   582  	var success bool
   583  	for _, node := range nodes {
   584  		if node == nDB.config.NodeID {
   585  			continue
   586  		}
   587  		logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node)
   588  		networks = nDB.findCommonNetworks(node)
   589  		err = nDB.bulkSyncNode(networks, node, true)
   590  		if err != nil {
   591  			err = fmt.Errorf("bulk sync to node %s failed: %v", node, err)
   592  			logrus.Warn(err.Error())
   593  		} else {
   594  			// bulk sync succeeded
   595  			success = true
   596  			// if its periodic bulksync stop after the first successful sync
   597  			if !all {
   598  				break
   599  			}
   600  		}
   601  	}
   602  
   603  	if success {
   604  		// if at least one node sync succeeded
   605  		return networks, nil
   606  	}
   607  
   608  	return nil, err
   609  }
   610  
   611  // Bulk sync all the table entries belonging to a set of networks to a
   612  // single peer node. It can be unsolicited or can be in response to an
   613  // unsolicited bulk sync
   614  func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error {
   615  	var msgs [][]byte
   616  
   617  	var unsolMsg string
   618  	if unsolicited {
   619  		unsolMsg = "unsolicited"
   620  	}
   621  
   622  	logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s",
   623  		nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node)
   624  
   625  	nDB.RLock()
   626  	mnode := nDB.nodes[node]
   627  	if mnode == nil {
   628  		nDB.RUnlock()
   629  		return nil
   630  	}
   631  
   632  	for _, nid := range networks {
   633  		nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool {
   634  			entry, ok := v.(*entry)
   635  			if !ok {
   636  				return false
   637  			}
   638  
   639  			eType := TableEventTypeCreate
   640  			if entry.deleting {
   641  				eType = TableEventTypeDelete
   642  			}
   643  
   644  			params := strings.Split(path[1:], "/")
   645  			tEvent := TableEvent{
   646  				Type:      eType,
   647  				LTime:     entry.ltime,
   648  				NodeName:  entry.node,
   649  				NetworkID: nid,
   650  				TableName: params[1],
   651  				Key:       params[2],
   652  				Value:     entry.value,
   653  				// The duration in second is a float that below would be truncated
   654  				ResidualReapTime: int32(entry.reapTime.Seconds()),
   655  			}
   656  
   657  			msg, err := encodeMessage(MessageTypeTableEvent, &tEvent)
   658  			if err != nil {
   659  				logrus.Errorf("Encode failure during bulk sync: %#v", tEvent)
   660  				return false
   661  			}
   662  
   663  			msgs = append(msgs, msg)
   664  			return false
   665  		})
   666  	}
   667  	nDB.RUnlock()
   668  
   669  	// Create a compound message
   670  	compound := makeCompoundMessage(msgs)
   671  
   672  	bsm := BulkSyncMessage{
   673  		LTime:       nDB.tableClock.Time(),
   674  		Unsolicited: unsolicited,
   675  		NodeName:    nDB.config.NodeID,
   676  		Networks:    networks,
   677  		Payload:     compound,
   678  	}
   679  
   680  	buf, err := encodeMessage(MessageTypeBulkSync, &bsm)
   681  	if err != nil {
   682  		return fmt.Errorf("failed to encode bulk sync message: %v", err)
   683  	}
   684  
   685  	nDB.Lock()
   686  	ch := make(chan struct{})
   687  	nDB.bulkSyncAckTbl[node] = ch
   688  	nDB.Unlock()
   689  
   690  	err = nDB.memberlist.SendReliable(&mnode.Node, buf)
   691  	if err != nil {
   692  		nDB.Lock()
   693  		delete(nDB.bulkSyncAckTbl, node)
   694  		nDB.Unlock()
   695  
   696  		return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err)
   697  	}
   698  
   699  	// Wait on a response only if it is unsolicited.
   700  	if unsolicited {
   701  		startTime := time.Now()
   702  		t := time.NewTimer(30 * time.Second)
   703  		select {
   704  		case <-t.C:
   705  			logrus.Errorf("Bulk sync to node %s timed out", node)
   706  		case <-ch:
   707  			logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime))
   708  		}
   709  		t.Stop()
   710  	}
   711  
   712  	return nil
   713  }
   714  
   715  // Returns a random offset between 0 and n
   716  func randomOffset(n int) int {
   717  	if n == 0 {
   718  		return 0
   719  	}
   720  
   721  	val, err := rand.Int(rand.Reader, big.NewInt(int64(n)))
   722  	if err != nil {
   723  		logrus.Errorf("Failed to get a random offset: %v", err)
   724  		return 0
   725  	}
   726  
   727  	return int(val.Int64())
   728  }
   729  
   730  // mRandomNodes is used to select up to m random nodes. It is possible
   731  // that less than m nodes are returned.
   732  func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string {
   733  	n := len(nodes)
   734  	mNodes := make([]string, 0, m)
   735  OUTER:
   736  	// Probe up to 3*n times, with large n this is not necessary
   737  	// since k << n, but with small n we want search to be
   738  	// exhaustive
   739  	for i := 0; i < 3*n && len(mNodes) < m; i++ {
   740  		// Get random node
   741  		idx := randomOffset(n)
   742  		node := nodes[idx]
   743  
   744  		if node == nDB.config.NodeID {
   745  			continue
   746  		}
   747  
   748  		// Check if we have this node already
   749  		for j := 0; j < len(mNodes); j++ {
   750  			if node == mNodes[j] {
   751  				continue OUTER
   752  			}
   753  		}
   754  
   755  		// Append the node
   756  		mNodes = append(mNodes, node)
   757  	}
   758  
   759  	return mNodes
   760  }