github.com/pwn-term/docker@v0.0.0-20210616085119-6e977cce2565/libnetwork/networkdb/cluster.go (about)

     1  package networkdb
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/rand"
     7  	"encoding/hex"
     8  	"fmt"
     9  	"log"
    10  	"math/big"
    11  	rnd "math/rand"
    12  	"net"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/hashicorp/memberlist"
    17  	"github.com/sirupsen/logrus"
    18  )
    19  
    20  const (
    21  	reapPeriod            = 5 * time.Second
    22  	rejoinClusterDuration = 10 * time.Second
    23  	rejoinInterval        = 60 * time.Second
    24  	retryInterval         = 1 * time.Second
    25  	nodeReapInterval      = 24 * time.Hour
    26  	nodeReapPeriod        = 2 * time.Hour
    27  	// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
    28  	// the following is roughly 1 minute
    29  	maxQueueLenBroadcastOnSync = 500
    30  )
    31  
    32  type logWriter struct{}
    33  
    34  func (l *logWriter) Write(p []byte) (int, error) {
    35  	str := string(p)
    36  	str = strings.TrimSuffix(str, "\n")
    37  
    38  	switch {
    39  	case strings.HasPrefix(str, "[WARN] "):
    40  		str = strings.TrimPrefix(str, "[WARN] ")
    41  		logrus.Warn(str)
    42  	case strings.HasPrefix(str, "[DEBUG] "):
    43  		str = strings.TrimPrefix(str, "[DEBUG] ")
    44  		logrus.Debug(str)
    45  	case strings.HasPrefix(str, "[INFO] "):
    46  		str = strings.TrimPrefix(str, "[INFO] ")
    47  		logrus.Info(str)
    48  	case strings.HasPrefix(str, "[ERR] "):
    49  		str = strings.TrimPrefix(str, "[ERR] ")
    50  		logrus.Warn(str)
    51  	}
    52  
    53  	return len(p), nil
    54  }
    55  
    56  // SetKey adds a new key to the key ring
    57  func (nDB *NetworkDB) SetKey(key []byte) {
    58  	logrus.Debugf("Adding key %.5s", hex.EncodeToString(key))
    59  	nDB.Lock()
    60  	defer nDB.Unlock()
    61  	for _, dbKey := range nDB.config.Keys {
    62  		if bytes.Equal(key, dbKey) {
    63  			return
    64  		}
    65  	}
    66  	nDB.config.Keys = append(nDB.config.Keys, key)
    67  	if nDB.keyring != nil {
    68  		nDB.keyring.AddKey(key)
    69  	}
    70  }
    71  
    72  // SetPrimaryKey sets the given key as the primary key. This should have
    73  // been added apriori through SetKey
    74  func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
    75  	logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key))
    76  	nDB.RLock()
    77  	defer nDB.RUnlock()
    78  	for _, dbKey := range nDB.config.Keys {
    79  		if bytes.Equal(key, dbKey) {
    80  			if nDB.keyring != nil {
    81  				nDB.keyring.UseKey(dbKey)
    82  			}
    83  			break
    84  		}
    85  	}
    86  }
    87  
    88  // RemoveKey removes a key from the key ring. The key being removed
    89  // can't be the primary key
    90  func (nDB *NetworkDB) RemoveKey(key []byte) {
    91  	logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key))
    92  	nDB.Lock()
    93  	defer nDB.Unlock()
    94  	for i, dbKey := range nDB.config.Keys {
    95  		if bytes.Equal(key, dbKey) {
    96  			nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...)
    97  			if nDB.keyring != nil {
    98  				nDB.keyring.RemoveKey(dbKey)
    99  			}
   100  			break
   101  		}
   102  	}
   103  }
   104  
   105  func (nDB *NetworkDB) clusterInit() error {
   106  	nDB.lastStatsTimestamp = time.Now()
   107  	nDB.lastHealthTimestamp = nDB.lastStatsTimestamp
   108  
   109  	config := memberlist.DefaultLANConfig()
   110  	config.Name = nDB.config.NodeID
   111  	config.BindAddr = nDB.config.BindAddr
   112  	config.AdvertiseAddr = nDB.config.AdvertiseAddr
   113  	config.UDPBufferSize = nDB.config.PacketBufferSize
   114  
   115  	if nDB.config.BindPort != 0 {
   116  		config.BindPort = nDB.config.BindPort
   117  	}
   118  
   119  	config.ProtocolVersion = memberlist.ProtocolVersion2Compatible
   120  	config.Delegate = &delegate{nDB: nDB}
   121  	config.Events = &eventDelegate{nDB: nDB}
   122  	// custom logger that does not add time or date, so they are not
   123  	// duplicated by logrus
   124  	config.Logger = log.New(&logWriter{}, "", 0)
   125  
   126  	var err error
   127  	if len(nDB.config.Keys) > 0 {
   128  		for i, key := range nDB.config.Keys {
   129  			logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key))
   130  		}
   131  		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
   132  		if err != nil {
   133  			return err
   134  		}
   135  		config.Keyring = nDB.keyring
   136  	}
   137  
   138  	nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{
   139  		NumNodes: func() int {
   140  			nDB.RLock()
   141  			num := len(nDB.nodes)
   142  			nDB.RUnlock()
   143  			return num
   144  		},
   145  		RetransmitMult: config.RetransmitMult,
   146  	}
   147  
   148  	nDB.nodeBroadcasts = &memberlist.TransmitLimitedQueue{
   149  		NumNodes: func() int {
   150  			nDB.RLock()
   151  			num := len(nDB.nodes)
   152  			nDB.RUnlock()
   153  			return num
   154  		},
   155  		RetransmitMult: config.RetransmitMult,
   156  	}
   157  
   158  	mlist, err := memberlist.Create(config)
   159  	if err != nil {
   160  		return fmt.Errorf("failed to create memberlist: %v", err)
   161  	}
   162  
   163  	nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background())
   164  	nDB.memberlist = mlist
   165  
   166  	for _, trigger := range []struct {
   167  		interval time.Duration
   168  		fn       func()
   169  	}{
   170  		{reapPeriod, nDB.reapState},
   171  		{config.GossipInterval, nDB.gossip},
   172  		{config.PushPullInterval, nDB.bulkSyncTables},
   173  		{retryInterval, nDB.reconnectNode},
   174  		{nodeReapPeriod, nDB.reapDeadNode},
   175  		{rejoinInterval, nDB.rejoinClusterBootStrap},
   176  	} {
   177  		t := time.NewTicker(trigger.interval)
   178  		go nDB.triggerFunc(trigger.interval, t.C, trigger.fn)
   179  		nDB.tickers = append(nDB.tickers, t)
   180  	}
   181  
   182  	return nil
   183  }
   184  
   185  func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) {
   186  	t := time.NewTicker(retryInterval)
   187  	defer t.Stop()
   188  
   189  	for {
   190  		select {
   191  		case <-t.C:
   192  			if _, err := nDB.memberlist.Join(members); err != nil {
   193  				logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err)
   194  				continue
   195  			}
   196  			if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   197  				logrus.Errorf("failed to send node join on retry: %v", err)
   198  				continue
   199  			}
   200  			return
   201  		case <-ctx.Done():
   202  			return
   203  		}
   204  	}
   205  
   206  }
   207  
   208  func (nDB *NetworkDB) clusterJoin(members []string) error {
   209  	mlist := nDB.memberlist
   210  
   211  	if _, err := mlist.Join(members); err != nil {
   212  		// In case of failure, we no longer need to explicitly call retryJoin.
   213  		// rejoinClusterBootStrap, which runs every minute, will retryJoin for 10sec
   214  		return fmt.Errorf("could not join node to memberlist: %v", err)
   215  	}
   216  
   217  	if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   218  		return fmt.Errorf("failed to send node join: %v", err)
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  func (nDB *NetworkDB) clusterLeave() error {
   225  	mlist := nDB.memberlist
   226  
   227  	if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil {
   228  		logrus.Errorf("failed to send node leave: %v", err)
   229  	}
   230  
   231  	if err := mlist.Leave(time.Second); err != nil {
   232  		return err
   233  	}
   234  
   235  	// cancel the context
   236  	nDB.cancelCtx()
   237  
   238  	for _, t := range nDB.tickers {
   239  		t.Stop()
   240  	}
   241  
   242  	return mlist.Shutdown()
   243  }
   244  
   245  func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) {
   246  	// Use a random stagger to avoid synchronizing
   247  	randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger))
   248  	select {
   249  	case <-time.After(randStagger):
   250  	case <-nDB.ctx.Done():
   251  		return
   252  	}
   253  	for {
   254  		select {
   255  		case <-C:
   256  			f()
   257  		case <-nDB.ctx.Done():
   258  			return
   259  		}
   260  	}
   261  }
   262  
   263  func (nDB *NetworkDB) reapDeadNode() {
   264  	nDB.Lock()
   265  	defer nDB.Unlock()
   266  	for _, nodeMap := range []map[string]*node{
   267  		nDB.failedNodes,
   268  		nDB.leftNodes,
   269  	} {
   270  		for id, n := range nodeMap {
   271  			if n.reapTime > nodeReapPeriod {
   272  				n.reapTime -= nodeReapPeriod
   273  				continue
   274  			}
   275  			logrus.Debugf("Garbage collect node %v", n.Name)
   276  			delete(nodeMap, id)
   277  		}
   278  	}
   279  }
   280  
   281  // rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster,
   282  // if not, call the cluster join to merge 2 separate clusters that are formed when all managers
   283  // stopped/started at the same time
   284  func (nDB *NetworkDB) rejoinClusterBootStrap() {
   285  	nDB.RLock()
   286  	if len(nDB.bootStrapIP) == 0 {
   287  		nDB.RUnlock()
   288  		return
   289  	}
   290  
   291  	myself, ok := nDB.nodes[nDB.config.NodeID]
   292  	if !ok {
   293  		nDB.RUnlock()
   294  		logrus.Warnf("rejoinClusterBootstrap unable to find local node info using ID:%v", nDB.config.NodeID)
   295  		return
   296  	}
   297  	bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP))
   298  	for _, bootIP := range nDB.bootStrapIP {
   299  		// botostrap IPs are usually IP:port from the Join
   300  		var bootstrapIP net.IP
   301  		ipStr, _, err := net.SplitHostPort(bootIP)
   302  		if err != nil {
   303  			// try to parse it as an IP with port
   304  			// Note this seems to be the case for swarm that do not specify any port
   305  			ipStr = bootIP
   306  		}
   307  		bootstrapIP = net.ParseIP(ipStr)
   308  		if bootstrapIP != nil {
   309  			for _, node := range nDB.nodes {
   310  				if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) {
   311  					// One of the bootstrap nodes (and not myself) is part of the cluster, return
   312  					nDB.RUnlock()
   313  					return
   314  				}
   315  			}
   316  			bootStrapIPs = append(bootStrapIPs, bootIP)
   317  		}
   318  	}
   319  	nDB.RUnlock()
   320  	if len(bootStrapIPs) == 0 {
   321  		// this will also avoid to call the Join with an empty list erasing the current bootstrap ip list
   322  		logrus.Debug("rejoinClusterBootStrap did not find any valid IP")
   323  		return
   324  	}
   325  	// None of the bootStrap nodes are in the cluster, call memberlist join
   326  	logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
   327  	ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration)
   328  	defer cancel()
   329  	nDB.retryJoin(ctx, bootStrapIPs)
   330  }
   331  
   332  func (nDB *NetworkDB) reconnectNode() {
   333  	nDB.RLock()
   334  	if len(nDB.failedNodes) == 0 {
   335  		nDB.RUnlock()
   336  		return
   337  	}
   338  
   339  	nodes := make([]*node, 0, len(nDB.failedNodes))
   340  	for _, n := range nDB.failedNodes {
   341  		nodes = append(nodes, n)
   342  	}
   343  	nDB.RUnlock()
   344  
   345  	node := nodes[randomOffset(len(nodes))]
   346  	addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
   347  
   348  	if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil {
   349  		return
   350  	}
   351  
   352  	if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
   353  		return
   354  	}
   355  
   356  	logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name)
   357  	nDB.bulkSync([]string{node.Name}, true)
   358  }
   359  
   360  // For timing the entry deletion in the reaper APIs that doesn't use monotonic clock
   361  // source (time.Now, Sub etc.) should be avoided. Hence we use reapTime in every
   362  // entry which is set initially to reapInterval and decremented by reapPeriod every time
   363  // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This
   364  // is safe as long as no other concurrent path touches the reapTime field.
   365  func (nDB *NetworkDB) reapState() {
   366  	// The reapTableEntries leverage the presence of the network so garbage collect entries first
   367  	nDB.reapTableEntries()
   368  	nDB.reapNetworks()
   369  }
   370  
   371  func (nDB *NetworkDB) reapNetworks() {
   372  	nDB.Lock()
   373  	for _, nn := range nDB.networks {
   374  		for id, n := range nn {
   375  			if n.leaving {
   376  				if n.reapTime <= 0 {
   377  					delete(nn, id)
   378  					continue
   379  				}
   380  				n.reapTime -= reapPeriod
   381  			}
   382  		}
   383  	}
   384  	nDB.Unlock()
   385  }
   386  
   387  func (nDB *NetworkDB) reapTableEntries() {
   388  	var nodeNetworks []string
   389  	// This is best effort, if the list of network changes will be picked up in the next cycle
   390  	nDB.RLock()
   391  	for nid := range nDB.networks[nDB.config.NodeID] {
   392  		nodeNetworks = append(nodeNetworks, nid)
   393  	}
   394  	nDB.RUnlock()
   395  
   396  	cycleStart := time.Now()
   397  	// In order to avoid blocking the database for a long time, apply the garbage collection logic by network
   398  	// The lock is taken at the beginning of the cycle and the deletion is inline
   399  	for _, nid := range nodeNetworks {
   400  		nDB.Lock()
   401  		nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool {
   402  			// timeCompensation compensate in case the lock took some time to be released
   403  			timeCompensation := time.Since(cycleStart)
   404  			entry, ok := v.(*entry)
   405  			if !ok || !entry.deleting {
   406  				return false
   407  			}
   408  
   409  			// In this check we are adding an extra 1 second to guarantee that when the number is truncated to int32 to fit the packet
   410  			// for the tableEvent the number is always strictly > 1 and never 0
   411  			if entry.reapTime > reapPeriod+timeCompensation+time.Second {
   412  				entry.reapTime -= reapPeriod + timeCompensation
   413  				return false
   414  			}
   415  
   416  			params := strings.Split(path[1:], "/")
   417  			nid := params[0]
   418  			tname := params[1]
   419  			key := params[2]
   420  
   421  			okTable, okNetwork := nDB.deleteEntry(nid, tname, key)
   422  			if !okTable {
   423  				logrus.Errorf("Table tree delete failed, entry with key:%s does not exists in the table:%s network:%s", key, tname, nid)
   424  			}
   425  			if !okNetwork {
   426  				logrus.Errorf("Network tree delete failed, entry with key:%s does not exists in the network:%s table:%s", key, nid, tname)
   427  			}
   428  
   429  			return false
   430  		})
   431  		nDB.Unlock()
   432  	}
   433  }
   434  
   435  func (nDB *NetworkDB) gossip() {
   436  	networkNodes := make(map[string][]string)
   437  	nDB.RLock()
   438  	thisNodeNetworks := nDB.networks[nDB.config.NodeID]
   439  	for nid := range thisNodeNetworks {
   440  		networkNodes[nid] = nDB.networkNodes[nid]
   441  	}
   442  	printStats := time.Since(nDB.lastStatsTimestamp) >= nDB.config.StatsPrintPeriod
   443  	printHealth := time.Since(nDB.lastHealthTimestamp) >= nDB.config.HealthPrintPeriod
   444  	nDB.RUnlock()
   445  
   446  	if printHealth {
   447  		healthScore := nDB.memberlist.GetHealthScore()
   448  		if healthScore != 0 {
   449  			logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore)
   450  		}
   451  		nDB.lastHealthTimestamp = time.Now()
   452  	}
   453  
   454  	for nid, nodes := range networkNodes {
   455  		mNodes := nDB.mRandomNodes(3, nodes)
   456  		bytesAvail := nDB.config.PacketBufferSize - compoundHeaderOverhead
   457  
   458  		nDB.RLock()
   459  		network, ok := thisNodeNetworks[nid]
   460  		nDB.RUnlock()
   461  		if !ok || network == nil {
   462  			// It is normal for the network to be removed
   463  			// between the time we collect the network
   464  			// attachments of this node and processing
   465  			// them here.
   466  			continue
   467  		}
   468  
   469  		broadcastQ := network.tableBroadcasts
   470  
   471  		if broadcastQ == nil {
   472  			logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid)
   473  			continue
   474  		}
   475  
   476  		msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail)
   477  		// Collect stats and print the queue info, note this code is here also to have a view of the queues empty
   478  		network.qMessagesSent += len(msgs)
   479  		if printStats {
   480  			logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d",
   481  				nDB.config.Hostname, nDB.config.NodeID,
   482  				nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(),
   483  				network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second)))
   484  			network.qMessagesSent = 0
   485  		}
   486  
   487  		if len(msgs) == 0 {
   488  			continue
   489  		}
   490  
   491  		// Create a compound message
   492  		compound := makeCompoundMessage(msgs)
   493  
   494  		for _, node := range mNodes {
   495  			nDB.RLock()
   496  			mnode := nDB.nodes[node]
   497  			nDB.RUnlock()
   498  
   499  			if mnode == nil {
   500  				break
   501  			}
   502  
   503  			// Send the compound message
   504  			if err := nDB.memberlist.SendBestEffort(&mnode.Node, compound); err != nil {
   505  				logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err)
   506  			}
   507  		}
   508  	}
   509  	// Reset the stats
   510  	if printStats {
   511  		nDB.lastStatsTimestamp = time.Now()
   512  	}
   513  }
   514  
   515  func (nDB *NetworkDB) bulkSyncTables() {
   516  	var networks []string
   517  	nDB.RLock()
   518  	for nid, network := range nDB.networks[nDB.config.NodeID] {
   519  		if network.leaving {
   520  			continue
   521  		}
   522  		networks = append(networks, nid)
   523  	}
   524  	nDB.RUnlock()
   525  
   526  	for {
   527  		if len(networks) == 0 {
   528  			break
   529  		}
   530  
   531  		nid := networks[0]
   532  		networks = networks[1:]
   533  
   534  		nDB.RLock()
   535  		nodes := nDB.networkNodes[nid]
   536  		nDB.RUnlock()
   537  
   538  		// No peer nodes on this network. Move on.
   539  		if len(nodes) == 0 {
   540  			continue
   541  		}
   542  
   543  		completed, err := nDB.bulkSync(nodes, false)
   544  		if err != nil {
   545  			logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err)
   546  			continue
   547  		}
   548  
   549  		// Remove all the networks for which we have
   550  		// successfully completed bulk sync in this iteration.
   551  		updatedNetworks := make([]string, 0, len(networks))
   552  		for _, nid := range networks {
   553  			var found bool
   554  			for _, completedNid := range completed {
   555  				if nid == completedNid {
   556  					found = true
   557  					break
   558  				}
   559  			}
   560  
   561  			if !found {
   562  				updatedNetworks = append(updatedNetworks, nid)
   563  			}
   564  		}
   565  
   566  		networks = updatedNetworks
   567  	}
   568  }
   569  
   570  func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
   571  	if !all {
   572  		// Get 2 random nodes. 2nd node will be tried if the bulk sync to
   573  		// 1st node fails.
   574  		nodes = nDB.mRandomNodes(2, nodes)
   575  	}
   576  
   577  	if len(nodes) == 0 {
   578  		return nil, nil
   579  	}
   580  
   581  	var err error
   582  	var networks []string
   583  	var success bool
   584  	for _, node := range nodes {
   585  		if node == nDB.config.NodeID {
   586  			continue
   587  		}
   588  		logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node)
   589  		networks = nDB.findCommonNetworks(node)
   590  		err = nDB.bulkSyncNode(networks, node, true)
   591  		if err != nil {
   592  			err = fmt.Errorf("bulk sync to node %s failed: %v", node, err)
   593  			logrus.Warn(err.Error())
   594  		} else {
   595  			// bulk sync succeeded
   596  			success = true
   597  			// if its periodic bulksync stop after the first successful sync
   598  			if !all {
   599  				break
   600  			}
   601  		}
   602  	}
   603  
   604  	if success {
   605  		// if at least one node sync succeeded
   606  		return networks, nil
   607  	}
   608  
   609  	return nil, err
   610  }
   611  
   612  // Bulk sync all the table entries belonging to a set of networks to a
   613  // single peer node. It can be unsolicited or can be in response to an
   614  // unsolicited bulk sync
   615  func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error {
   616  	var msgs [][]byte
   617  
   618  	var unsolMsg string
   619  	if unsolicited {
   620  		unsolMsg = "unsolicited"
   621  	}
   622  
   623  	logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s",
   624  		nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node)
   625  
   626  	nDB.RLock()
   627  	mnode := nDB.nodes[node]
   628  	if mnode == nil {
   629  		nDB.RUnlock()
   630  		return nil
   631  	}
   632  
   633  	for _, nid := range networks {
   634  		nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool {
   635  			entry, ok := v.(*entry)
   636  			if !ok {
   637  				return false
   638  			}
   639  
   640  			eType := TableEventTypeCreate
   641  			if entry.deleting {
   642  				eType = TableEventTypeDelete
   643  			}
   644  
   645  			params := strings.Split(path[1:], "/")
   646  			tEvent := TableEvent{
   647  				Type:      eType,
   648  				LTime:     entry.ltime,
   649  				NodeName:  entry.node,
   650  				NetworkID: nid,
   651  				TableName: params[1],
   652  				Key:       params[2],
   653  				Value:     entry.value,
   654  				// The duration in second is a float that below would be truncated
   655  				ResidualReapTime: int32(entry.reapTime.Seconds()),
   656  			}
   657  
   658  			msg, err := encodeMessage(MessageTypeTableEvent, &tEvent)
   659  			if err != nil {
   660  				logrus.Errorf("Encode failure during bulk sync: %#v", tEvent)
   661  				return false
   662  			}
   663  
   664  			msgs = append(msgs, msg)
   665  			return false
   666  		})
   667  	}
   668  	nDB.RUnlock()
   669  
   670  	// Create a compound message
   671  	compound := makeCompoundMessage(msgs)
   672  
   673  	bsm := BulkSyncMessage{
   674  		LTime:       nDB.tableClock.Time(),
   675  		Unsolicited: unsolicited,
   676  		NodeName:    nDB.config.NodeID,
   677  		Networks:    networks,
   678  		Payload:     compound,
   679  	}
   680  
   681  	buf, err := encodeMessage(MessageTypeBulkSync, &bsm)
   682  	if err != nil {
   683  		return fmt.Errorf("failed to encode bulk sync message: %v", err)
   684  	}
   685  
   686  	nDB.Lock()
   687  	ch := make(chan struct{})
   688  	nDB.bulkSyncAckTbl[node] = ch
   689  	nDB.Unlock()
   690  
   691  	err = nDB.memberlist.SendReliable(&mnode.Node, buf)
   692  	if err != nil {
   693  		nDB.Lock()
   694  		delete(nDB.bulkSyncAckTbl, node)
   695  		nDB.Unlock()
   696  
   697  		return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err)
   698  	}
   699  
   700  	// Wait on a response only if it is unsolicited.
   701  	if unsolicited {
   702  		startTime := time.Now()
   703  		t := time.NewTimer(30 * time.Second)
   704  		select {
   705  		case <-t.C:
   706  			logrus.Errorf("Bulk sync to node %s timed out", node)
   707  		case <-ch:
   708  			logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime))
   709  		}
   710  		t.Stop()
   711  	}
   712  
   713  	return nil
   714  }
   715  
   716  // Returns a random offset between 0 and n
   717  func randomOffset(n int) int {
   718  	if n == 0 {
   719  		return 0
   720  	}
   721  
   722  	val, err := rand.Int(rand.Reader, big.NewInt(int64(n)))
   723  	if err != nil {
   724  		logrus.Errorf("Failed to get a random offset: %v", err)
   725  		return 0
   726  	}
   727  
   728  	return int(val.Int64())
   729  }
   730  
   731  // mRandomNodes is used to select up to m random nodes. It is possible
   732  // that less than m nodes are returned.
   733  func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string {
   734  	n := len(nodes)
   735  	mNodes := make([]string, 0, m)
   736  OUTER:
   737  	// Probe up to 3*n times, with large n this is not necessary
   738  	// since k << n, but with small n we want search to be
   739  	// exhaustive
   740  	for i := 0; i < 3*n && len(mNodes) < m; i++ {
   741  		// Get random node
   742  		idx := randomOffset(n)
   743  		node := nodes[idx]
   744  
   745  		if node == nDB.config.NodeID {
   746  			continue
   747  		}
   748  
   749  		// Check if we have this node already
   750  		for j := 0; j < len(mNodes); j++ {
   751  			if node == mNodes[j] {
   752  				continue OUTER
   753  			}
   754  		}
   755  
   756  		// Append the node
   757  		mNodes = append(mNodes, node)
   758  	}
   759  
   760  	return mNodes
   761  }