github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/networkdb/networkdb.go

github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/networkdb/networkdb.go (about)

     1  package networkdb
     2  
     3  //go:generate protoc -I.:../vendor/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/docker/libnetwork/networkdb,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. networkdb.proto
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/armon/go-radix"
    14  	"github.com/docker/docker/libnetwork/types"
    15  	"github.com/docker/docker/pkg/stringid"
    16  	"github.com/docker/go-events"
    17  	"github.com/hashicorp/memberlist"
    18  	"github.com/hashicorp/serf/serf"
    19  	"github.com/sirupsen/logrus"
    20  )
    21  
    22  const (
    23  	byTable int = 1 + iota
    24  	byNetwork
    25  )
    26  
    27  // NetworkDB instance drives the networkdb cluster and acts the broker
    28  // for cluster-scoped and network-scoped gossip and watches.
    29  type NetworkDB struct {
    30  	// The clocks MUST be the first things
    31  	// in this struct due to Golang issue #599.
    32  
    33  	// Global lamport clock for node network attach events.
    34  	networkClock serf.LamportClock
    35  
    36  	// Global lamport clock for table events.
    37  	tableClock serf.LamportClock
    38  
    39  	sync.RWMutex
    40  
    41  	// NetworkDB configuration.
    42  	config *Config
    43  
    44  	// All the tree index (byTable, byNetwork) that we maintain
    45  	// the db.
    46  	indexes map[int]*radix.Tree
    47  
    48  	// Memberlist we use to drive the cluster.
    49  	memberlist *memberlist.Memberlist
    50  
    51  	// List of all peer nodes in the cluster not-limited to any
    52  	// network.
    53  	nodes map[string]*node
    54  
    55  	// List of all peer nodes which have failed
    56  	failedNodes map[string]*node
    57  
    58  	// List of all peer nodes which have left
    59  	leftNodes map[string]*node
    60  
    61  	// A multi-dimensional map of network/node attachments. The
    62  	// first key is a node name and the second key is a network ID
    63  	// for the network that node is participating in.
    64  	networks map[string]map[string]*network
    65  
    66  	// A map of nodes which are participating in a given
    67  	// network. The key is a network ID.
    68  	networkNodes map[string][]string
    69  
    70  	// A table of ack channels for every node from which we are
    71  	// waiting for an ack.
    72  	bulkSyncAckTbl map[string]chan struct{}
    73  
    74  	// Broadcast queue for network event gossip.
    75  	networkBroadcasts *memberlist.TransmitLimitedQueue
    76  
    77  	// Broadcast queue for node event gossip.
    78  	nodeBroadcasts *memberlist.TransmitLimitedQueue
    79  
    80  	// A central context to stop all go routines running on
    81  	// behalf of the NetworkDB instance.
    82  	ctx       context.Context
    83  	cancelCtx context.CancelFunc
    84  
    85  	// A central broadcaster for all local watchers watching table
    86  	// events.
    87  	broadcaster *events.Broadcaster
    88  
    89  	// List of all tickers which needed to be stopped when
    90  	// cleaning up.
    91  	tickers []*time.Ticker
    92  
    93  	// Reference to the memberlist's keyring to add & remove keys
    94  	keyring *memberlist.Keyring
    95  
    96  	// bootStrapIP is the list of IPs that can be used to bootstrap
    97  	// the gossip.
    98  	bootStrapIP []string
    99  
   100  	// lastStatsTimestamp is the last timestamp when the stats got printed
   101  	lastStatsTimestamp time.Time
   102  
   103  	// lastHealthTimestamp is the last timestamp when the health score got printed
   104  	lastHealthTimestamp time.Time
   105  }
   106  
   107  // PeerInfo represents the peer (gossip cluster) nodes of a network
   108  type PeerInfo struct {
   109  	Name string
   110  	IP   string
   111  }
   112  
   113  // PeerClusterInfo represents the peer (gossip cluster) nodes
   114  type PeerClusterInfo struct {
   115  	PeerInfo
   116  }
   117  
   118  type node struct {
   119  	memberlist.Node
   120  	ltime serf.LamportTime
   121  	// Number of hours left before the reaper removes the node
   122  	reapTime time.Duration
   123  }
   124  
   125  // network describes the node/network attachment.
   126  type network struct {
   127  	// Network ID
   128  	id string
   129  
   130  	// Lamport time for the latest state of the entry.
   131  	ltime serf.LamportTime
   132  
   133  	// Gets set to true after the first bulk sync happens
   134  	inSync bool
   135  
   136  	// Node leave is in progress.
   137  	leaving bool
   138  
   139  	// Number of seconds still left before a deleted network entry gets
   140  	// removed from networkDB
   141  	reapTime time.Duration
   142  
   143  	// The broadcast queue for table event gossip. This is only
   144  	// initialized for this node's network attachment entries.
   145  	tableBroadcasts *memberlist.TransmitLimitedQueue
   146  
   147  	// Number of gossip messages sent related to this network during the last stats collection period
   148  	qMessagesSent int
   149  
   150  	// Number of entries on the network. This value is the sum of all the entries of all the tables of a specific network.
   151  	// Its use is for statistics purposes. It keep tracks of database size and is printed per network every StatsPrintPeriod
   152  	// interval
   153  	entriesNumber int
   154  }
   155  
   156  // Config represents the configuration of the networkdb instance and
   157  // can be passed by the caller.
   158  type Config struct {
   159  	// NodeID is the node unique identifier of the node when is part of the cluster
   160  	NodeID string
   161  
   162  	// Hostname is the node hostname.
   163  	Hostname string
   164  
   165  	// BindAddr is the IP on which networkdb listens. It can be
   166  	// 0.0.0.0 to listen on all addresses on the host.
   167  	BindAddr string
   168  
   169  	// AdvertiseAddr is the node's IP address that we advertise for
   170  	// cluster communication.
   171  	AdvertiseAddr string
   172  
   173  	// BindPort is the local node's port to which we bind to for
   174  	// cluster communication.
   175  	BindPort int
   176  
   177  	// Keys to be added to the Keyring of the memberlist. Key at index
   178  	// 0 is the primary key
   179  	Keys [][]byte
   180  
   181  	// PacketBufferSize is the maximum number of bytes that memberlist will
   182  	// put in a packet (this will be for UDP packets by default with a NetTransport).
   183  	// A safe value for this is typically 1400 bytes (which is the default). However,
   184  	// depending on your network's MTU (Maximum Transmission Unit) you may
   185  	// be able to increase this to get more content into each gossip packet.
   186  	PacketBufferSize int
   187  
   188  	// reapEntryInterval duration of a deleted entry before being garbage collected
   189  	reapEntryInterval time.Duration
   190  
   191  	// reapNetworkInterval duration of a delted network before being garbage collected
   192  	// NOTE this MUST always be higher than reapEntryInterval
   193  	reapNetworkInterval time.Duration
   194  
   195  	// rejoinClusterDuration represents retryJoin timeout used by rejoinClusterBootStrap.
   196  	// Default is 10sec.
   197  	rejoinClusterDuration time.Duration
   198  
   199  	// rejoinClusterInterval represents interval on which rejoinClusterBootStrap runs.
   200  	// Default is 60sec.
   201  	rejoinClusterInterval time.Duration
   202  
   203  	// StatsPrintPeriod the period to use to print queue stats
   204  	// Default is 5min
   205  	StatsPrintPeriod time.Duration
   206  
   207  	// HealthPrintPeriod the period to use to print the health score
   208  	// Default is 1min
   209  	HealthPrintPeriod time.Duration
   210  }
   211  
   212  // entry defines a table entry
   213  type entry struct {
   214  	// node from which this entry was learned.
   215  	node string
   216  
   217  	// Lamport time for the most recent update to the entry
   218  	ltime serf.LamportTime
   219  
   220  	// Opaque value store in the entry
   221  	value []byte
   222  
   223  	// Deleting the entry is in progress. All entries linger in
   224  	// the cluster for certain amount of time after deletion.
   225  	deleting bool
   226  
   227  	// Number of seconds still left before a deleted table entry gets
   228  	// removed from networkDB
   229  	reapTime time.Duration
   230  }
   231  
   232  // DefaultConfig returns a NetworkDB config with default values
   233  func DefaultConfig() *Config {
   234  	hostname, _ := os.Hostname()
   235  	return &Config{
   236  		NodeID:                stringid.TruncateID(stringid.GenerateRandomID()),
   237  		Hostname:              hostname,
   238  		BindAddr:              "0.0.0.0",
   239  		PacketBufferSize:      1400,
   240  		StatsPrintPeriod:      5 * time.Minute,
   241  		HealthPrintPeriod:     1 * time.Minute,
   242  		reapEntryInterval:     30 * time.Minute,
   243  		rejoinClusterDuration: 10 * time.Second,
   244  		rejoinClusterInterval: 60 * time.Second,
   245  	}
   246  }
   247  
   248  // New creates a new instance of NetworkDB using the Config passed by
   249  // the caller.
   250  func New(c *Config) (*NetworkDB, error) {
   251  	// The garbage collection logic for entries leverage the presence of the network.
   252  	// For this reason the expiration time of the network is put slightly higher than the entry expiration so that
   253  	// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
   254  	c.reapNetworkInterval = c.reapEntryInterval + 5*reapPeriod
   255  
   256  	nDB := &NetworkDB{
   257  		config:         c,
   258  		indexes:        make(map[int]*radix.Tree),
   259  		networks:       make(map[string]map[string]*network),
   260  		nodes:          make(map[string]*node),
   261  		failedNodes:    make(map[string]*node),
   262  		leftNodes:      make(map[string]*node),
   263  		networkNodes:   make(map[string][]string),
   264  		bulkSyncAckTbl: make(map[string]chan struct{}),
   265  		broadcaster:    events.NewBroadcaster(),
   266  	}
   267  
   268  	nDB.indexes[byTable] = radix.New()
   269  	nDB.indexes[byNetwork] = radix.New()
   270  
   271  	logrus.Infof("New memberlist node - Node:%v will use memberlist nodeID:%v with config:%+v", c.Hostname, c.NodeID, c)
   272  	if err := nDB.clusterInit(); err != nil {
   273  		return nil, err
   274  	}
   275  
   276  	return nDB, nil
   277  }
   278  
   279  // Join joins this NetworkDB instance with a list of peer NetworkDB
   280  // instances passed by the caller in the form of addr:port
   281  func (nDB *NetworkDB) Join(members []string) error {
   282  	nDB.Lock()
   283  	nDB.bootStrapIP = append([]string(nil), members...)
   284  	logrus.Infof("The new bootstrap node list is:%v", nDB.bootStrapIP)
   285  	nDB.Unlock()
   286  	return nDB.clusterJoin(members)
   287  }
   288  
   289  // Close destroys this NetworkDB instance by leave the cluster,
   290  // stopping timers, canceling goroutines etc.
   291  func (nDB *NetworkDB) Close() {
   292  	if err := nDB.clusterLeave(); err != nil {
   293  		logrus.Errorf("%v(%v) Could not close DB: %v", nDB.config.Hostname, nDB.config.NodeID, err)
   294  	}
   295  
   296  	//Avoid (*Broadcaster).run goroutine leak
   297  	nDB.broadcaster.Close()
   298  }
   299  
   300  // ClusterPeers returns all the gossip cluster peers.
   301  func (nDB *NetworkDB) ClusterPeers() []PeerInfo {
   302  	nDB.RLock()
   303  	defer nDB.RUnlock()
   304  	peers := make([]PeerInfo, 0, len(nDB.nodes))
   305  	for _, node := range nDB.nodes {
   306  		peers = append(peers, PeerInfo{
   307  			Name: node.Name,
   308  			IP:   node.Node.Addr.String(),
   309  		})
   310  	}
   311  	return peers
   312  }
   313  
   314  // Peers returns the gossip peers for a given network.
   315  func (nDB *NetworkDB) Peers(nid string) []PeerInfo {
   316  	nDB.RLock()
   317  	defer nDB.RUnlock()
   318  	peers := make([]PeerInfo, 0, len(nDB.networkNodes[nid]))
   319  	for _, nodeName := range nDB.networkNodes[nid] {
   320  		if node, ok := nDB.nodes[nodeName]; ok {
   321  			peers = append(peers, PeerInfo{
   322  				Name: node.Name,
   323  				IP:   node.Addr.String(),
   324  			})
   325  		} else {
   326  			// Added for testing purposes, this condition should never happen else mean that the network list
   327  			// is out of sync with the node list
   328  			peers = append(peers, PeerInfo{Name: nodeName, IP: "unknown"})
   329  		}
   330  	}
   331  	return peers
   332  }
   333  
   334  // GetEntry retrieves the value of a table entry in a given (network,
   335  // table, key) tuple
   336  func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
   337  	nDB.RLock()
   338  	defer nDB.RUnlock()
   339  	entry, err := nDB.getEntry(tname, nid, key)
   340  	if err != nil {
   341  		return nil, err
   342  	}
   343  	if entry != nil && entry.deleting {
   344  		return nil, types.NotFoundErrorf("entry in table %s network id %s and key %s deleted and pending garbage collection", tname, nid, key)
   345  	}
   346  
   347  	return entry.value, nil
   348  }
   349  
   350  func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
   351  	e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
   352  	if !ok {
   353  		return nil, types.NotFoundErrorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
   354  	}
   355  
   356  	return e.(*entry), nil
   357  }
   358  
   359  // CreateEntry creates a table entry in NetworkDB for given (network,
   360  // table, key) tuple and if the NetworkDB is part of the cluster
   361  // propagates this event to the cluster. It is an error to create an
   362  // entry for the same tuple for which there is already an existing
   363  // entry unless the current entry is deleting state.
   364  func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
   365  	nDB.Lock()
   366  	oldEntry, err := nDB.getEntry(tname, nid, key)
   367  	if err == nil || (oldEntry != nil && !oldEntry.deleting) {
   368  		nDB.Unlock()
   369  		return fmt.Errorf("cannot create entry in table %s with network id %s and key %s, already exists", tname, nid, key)
   370  	}
   371  
   372  	entry := &entry{
   373  		ltime: nDB.tableClock.Increment(),
   374  		node:  nDB.config.NodeID,
   375  		value: value,
   376  	}
   377  
   378  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   379  	nDB.Unlock()
   380  
   381  	if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil {
   382  		return fmt.Errorf("cannot send create event for table %s, %v", tname, err)
   383  	}
   384  
   385  	return nil
   386  }
   387  
   388  // UpdateEntry updates a table entry in NetworkDB for given (network,
   389  // table, key) tuple and if the NetworkDB is part of the cluster
   390  // propagates this event to the cluster. It is an error to update a
   391  // non-existent entry.
   392  func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
   393  	nDB.Lock()
   394  	if _, err := nDB.getEntry(tname, nid, key); err != nil {
   395  		nDB.Unlock()
   396  		return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
   397  	}
   398  
   399  	entry := &entry{
   400  		ltime: nDB.tableClock.Increment(),
   401  		node:  nDB.config.NodeID,
   402  		value: value,
   403  	}
   404  
   405  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   406  	nDB.Unlock()
   407  
   408  	if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil {
   409  		return fmt.Errorf("cannot send table update event: %v", err)
   410  	}
   411  
   412  	return nil
   413  }
   414  
   415  // TableElem elem
   416  type TableElem struct {
   417  	Value []byte
   418  	owner string
   419  }
   420  
   421  // GetTableByNetwork walks the networkdb by the give table and network id and
   422  // returns a map of keys and values
   423  func (nDB *NetworkDB) GetTableByNetwork(tname, nid string) map[string]*TableElem {
   424  	entries := make(map[string]*TableElem)
   425  	nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s/%s", tname, nid), func(k string, v interface{}) bool {
   426  		entry := v.(*entry)
   427  		if entry.deleting {
   428  			return false
   429  		}
   430  		key := k[strings.LastIndex(k, "/")+1:]
   431  		entries[key] = &TableElem{Value: entry.value, owner: entry.node}
   432  		return false
   433  	})
   434  	return entries
   435  }
   436  
   437  // DeleteEntry deletes a table entry in NetworkDB for given (network,
   438  // table, key) tuple and if the NetworkDB is part of the cluster
   439  // propagates this event to the cluster.
   440  func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
   441  	nDB.Lock()
   442  	oldEntry, err := nDB.getEntry(tname, nid, key)
   443  	if err != nil || oldEntry == nil || oldEntry.deleting {
   444  		nDB.Unlock()
   445  		return fmt.Errorf("cannot delete entry %s with network id %s and key %s "+
   446  			"does not exist or is already being deleted", tname, nid, key)
   447  	}
   448  
   449  	entry := &entry{
   450  		ltime:    nDB.tableClock.Increment(),
   451  		node:     nDB.config.NodeID,
   452  		value:    oldEntry.value,
   453  		deleting: true,
   454  		reapTime: nDB.config.reapEntryInterval,
   455  	}
   456  
   457  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   458  	nDB.Unlock()
   459  
   460  	if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
   461  		return fmt.Errorf("cannot send table delete event: %v", err)
   462  	}
   463  
   464  	return nil
   465  }
   466  
   467  func (nDB *NetworkDB) deleteNodeFromNetworks(deletedNode string) {
   468  	for nid, nodes := range nDB.networkNodes {
   469  		updatedNodes := make([]string, 0, len(nodes))
   470  		for _, node := range nodes {
   471  			if node == deletedNode {
   472  				continue
   473  			}
   474  
   475  			updatedNodes = append(updatedNodes, node)
   476  		}
   477  
   478  		nDB.networkNodes[nid] = updatedNodes
   479  	}
   480  
   481  	delete(nDB.networks, deletedNode)
   482  }
   483  
   484  // deleteNodeNetworkEntries is called in 2 conditions with 2 different outcomes:
   485  // 1) when a notification is coming of a node leaving the network
   486  //   - Walk all the network entries and mark the leaving node's entries for deletion
   487  //     These will be garbage collected when the reap timer will expire
   488  //
   489  // 2) when the local node is leaving the network
   490  //   - Walk all the network entries:
   491  //     A) if the entry is owned by the local node
   492  //     then we will mark it for deletion. This will ensure that if a node did not
   493  //     yet received the notification that the local node is leaving, will be aware
   494  //     of the entries to be deleted.
   495  //     B) if the entry is owned by a remote node, then we can safely delete it. This
   496  //     ensures that if we join back this network as we receive the CREATE event for
   497  //     entries owned by remote nodes, we will accept them and we notify the application
   498  func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
   499  	// Indicates if the delete is triggered for the local node
   500  	isNodeLocal := node == nDB.config.NodeID
   501  
   502  	nDB.indexes[byNetwork].WalkPrefix("/"+nid,
   503  		func(path string, v interface{}) bool {
   504  			oldEntry := v.(*entry)
   505  			params := strings.Split(path[1:], "/")
   506  			nid := params[0]
   507  			tname := params[1]
   508  			key := params[2]
   509  
   510  			// If the entry is owned by a remote node and this node is not leaving the network
   511  			if oldEntry.node != node && !isNodeLocal {
   512  				// Don't do anything because the event is triggered for a node that does not own this entry
   513  				return false
   514  			}
   515  
   516  			// If this entry is already marked for deletion and this node is not leaving the network
   517  			if oldEntry.deleting && !isNodeLocal {
   518  				// Don't do anything this entry will be already garbage collected using the old reapTime
   519  				return false
   520  			}
   521  
   522  			entry := &entry{
   523  				ltime:    oldEntry.ltime,
   524  				node:     oldEntry.node,
   525  				value:    oldEntry.value,
   526  				deleting: true,
   527  				reapTime: nDB.config.reapEntryInterval,
   528  			}
   529  
   530  			// we arrived at this point in 2 cases:
   531  			// 1) this entry is owned by the node that is leaving the network
   532  			// 2) the local node is leaving the network
   533  			if oldEntry.node == node {
   534  				if isNodeLocal {
   535  					// TODO fcrisciani: this can be removed if there is no way to leave the network
   536  					// without doing a delete of all the objects
   537  					entry.ltime++
   538  				}
   539  
   540  				if !oldEntry.deleting {
   541  					nDB.createOrUpdateEntry(nid, tname, key, entry)
   542  				}
   543  			} else {
   544  				// the local node is leaving the network, all the entries of remote nodes can be safely removed
   545  				nDB.deleteEntry(nid, tname, key)
   546  			}
   547  
   548  			// Notify to the upper layer only entries not already marked for deletion
   549  			if !oldEntry.deleting {
   550  				nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
   551  			}
   552  			return false
   553  		})
   554  }
   555  
   556  func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
   557  	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
   558  		oldEntry := v.(*entry)
   559  		if oldEntry.node != node {
   560  			return false
   561  		}
   562  
   563  		params := strings.Split(path[1:], "/")
   564  		tname := params[0]
   565  		nid := params[1]
   566  		key := params[2]
   567  
   568  		nDB.deleteEntry(nid, tname, key)
   569  
   570  		if !oldEntry.deleting {
   571  			nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, oldEntry.value))
   572  		}
   573  		return false
   574  	})
   575  }
   576  
   577  // WalkTable walks a single table in NetworkDB and invokes the passed
   578  // function for each entry in the table passing the network, key,
   579  // value. The walk stops if the passed function returns a true.
   580  func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte, bool) bool) error {
   581  	nDB.RLock()
   582  	values := make(map[string]interface{})
   583  	nDB.indexes[byTable].WalkPrefix("/"+tname, func(path string, v interface{}) bool {
   584  		values[path] = v
   585  		return false
   586  	})
   587  	nDB.RUnlock()
   588  
   589  	for k, v := range values {
   590  		params := strings.Split(k[1:], "/")
   591  		nid := params[1]
   592  		key := params[2]
   593  		if fn(nid, key, v.(*entry).value, v.(*entry).deleting) {
   594  			return nil
   595  		}
   596  	}
   597  
   598  	return nil
   599  }
   600  
   601  // JoinNetwork joins this node to a given network and propagates this
   602  // event across the cluster. This triggers this node joining the
   603  // sub-cluster of this network and participates in the network-scoped
   604  // gossip and bulk sync for this network.
   605  func (nDB *NetworkDB) JoinNetwork(nid string) error {
   606  	ltime := nDB.networkClock.Increment()
   607  
   608  	nDB.Lock()
   609  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   610  	if !ok {
   611  		nodeNetworks = make(map[string]*network)
   612  		nDB.networks[nDB.config.NodeID] = nodeNetworks
   613  	}
   614  	n, ok := nodeNetworks[nid]
   615  	var entries int
   616  	if ok {
   617  		entries = n.entriesNumber
   618  	}
   619  	nodeNetworks[nid] = &network{id: nid, ltime: ltime, entriesNumber: entries}
   620  	nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{
   621  		NumNodes: func() int {
   622  			//TODO fcrisciani this can be optimized maybe avoiding the lock?
   623  			// this call is done each GetBroadcasts call to evaluate the number of
   624  			// replicas for the message
   625  			nDB.RLock()
   626  			defer nDB.RUnlock()
   627  			return len(nDB.networkNodes[nid])
   628  		},
   629  		RetransmitMult: 4,
   630  	}
   631  	nDB.addNetworkNode(nid, nDB.config.NodeID)
   632  	networkNodes := nDB.networkNodes[nid]
   633  	n = nodeNetworks[nid]
   634  	nDB.Unlock()
   635  
   636  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
   637  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   638  	}
   639  
   640  	logrus.Debugf("%v(%v): joined network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   641  	if _, err := nDB.bulkSync(networkNodes, true); err != nil {
   642  		logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
   643  	}
   644  
   645  	// Mark the network as being synced
   646  	// note this is a best effort, we are not checking the result of the bulk sync
   647  	nDB.Lock()
   648  	n.inSync = true
   649  	nDB.Unlock()
   650  
   651  	return nil
   652  }
   653  
   654  // LeaveNetwork leaves this node from a given network and propagates
   655  // this event across the cluster. This triggers this node leaving the
   656  // sub-cluster of this network and as a result will no longer
   657  // participate in the network-scoped gossip and bulk sync for this
   658  // network. Also remove all the table entries for this network from
   659  // networkdb
   660  func (nDB *NetworkDB) LeaveNetwork(nid string) error {
   661  	ltime := nDB.networkClock.Increment()
   662  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeLeave, ltime); err != nil {
   663  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   664  	}
   665  
   666  	nDB.Lock()
   667  	defer nDB.Unlock()
   668  
   669  	// Remove myself from the list of the nodes participating to the network
   670  	nDB.deleteNetworkNode(nid, nDB.config.NodeID)
   671  
   672  	// Update all the local entries marking them for deletion and delete all the remote entries
   673  	nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeID)
   674  
   675  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   676  	if !ok {
   677  		return fmt.Errorf("could not find self node for network %s while trying to leave", nid)
   678  	}
   679  
   680  	n, ok := nodeNetworks[nid]
   681  	if !ok {
   682  		return fmt.Errorf("could not find network %s while trying to leave", nid)
   683  	}
   684  
   685  	logrus.Debugf("%v(%v): leaving network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   686  	n.ltime = ltime
   687  	n.reapTime = nDB.config.reapNetworkInterval
   688  	n.leaving = true
   689  	return nil
   690  }
   691  
   692  // addNetworkNode adds the node to the list of nodes which participate
   693  // in the passed network only if it is not already present. Caller
   694  // should hold the NetworkDB lock while calling this
   695  func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
   696  	nodes := nDB.networkNodes[nid]
   697  	for _, node := range nodes {
   698  		if node == nodeName {
   699  			return
   700  		}
   701  	}
   702  
   703  	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nodeName)
   704  }
   705  
   706  // Deletes the node from the list of nodes which participate in the
   707  // passed network. Caller should hold the NetworkDB lock while calling
   708  // this
   709  func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
   710  	nodes, ok := nDB.networkNodes[nid]
   711  	if !ok || len(nodes) == 0 {
   712  		return
   713  	}
   714  	newNodes := make([]string, 0, len(nodes)-1)
   715  	for _, name := range nodes {
   716  		if name == nodeName {
   717  			continue
   718  		}
   719  		newNodes = append(newNodes, name)
   720  	}
   721  	nDB.networkNodes[nid] = newNodes
   722  }
   723  
   724  // findCommonnetworks find the networks that both this node and the
   725  // passed node have joined.
   726  func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string {
   727  	nDB.RLock()
   728  	defer nDB.RUnlock()
   729  
   730  	var networks []string
   731  	for nid := range nDB.networks[nDB.config.NodeID] {
   732  		if n, ok := nDB.networks[nodeName][nid]; ok {
   733  			if !n.leaving {
   734  				networks = append(networks, nid)
   735  			}
   736  		}
   737  	}
   738  
   739  	return networks
   740  }
   741  
   742  func (nDB *NetworkDB) updateLocalNetworkTime() {
   743  	nDB.Lock()
   744  	defer nDB.Unlock()
   745  
   746  	ltime := nDB.networkClock.Increment()
   747  	for _, n := range nDB.networks[nDB.config.NodeID] {
   748  		n.ltime = ltime
   749  	}
   750  }
   751  
   752  // createOrUpdateEntry this function handles the creation or update of entries into the local
   753  // tree store. It is also used to keep in sync the entries number of the network (all tables are aggregated)
   754  func (nDB *NetworkDB) createOrUpdateEntry(nid, tname, key string, entry interface{}) (bool, bool) {
   755  	_, okTable := nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   756  	_, okNetwork := nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   757  	if !okNetwork {
   758  		// Add only if it is an insert not an update
   759  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   760  		if ok {
   761  			n.entriesNumber++
   762  		}
   763  	}
   764  	return okTable, okNetwork
   765  }
   766  
   767  // deleteEntry this function handles the deletion of entries into the local tree store.
   768  // It is also used to keep in sync the entries number of the network (all tables are aggregated)
   769  func (nDB *NetworkDB) deleteEntry(nid, tname, key string) (bool, bool) {
   770  	_, okTable := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
   771  	_, okNetwork := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key))
   772  	if okNetwork {
   773  		// Remove only if the delete is successful
   774  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   775  		if ok {
   776  			n.entriesNumber--
   777  		}
   778  	}
   779  	return okTable, okNetwork
   780  }