github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/networkdb/networkdb.go

github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/networkdb/networkdb.go (about)

     1  package networkdb
     2  
     3  //go:generate protoc -I=. -I=../../vendor/ --gogofaster_out=import_path=github.com/Prakhar-Agarwal-byte/moby/libnetwork/networkdb:. networkdb.proto
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/containerd/log"
    15  	"github.com/Prakhar-Agarwal-byte/moby/libnetwork/types"
    16  	"github.com/Prakhar-Agarwal-byte/moby/pkg/stringid"
    17  	"github.com/docker/go-events"
    18  	iradix "github.com/hashicorp/go-immutable-radix"
    19  	"github.com/hashicorp/memberlist"
    20  	"github.com/hashicorp/serf/serf"
    21  )
    22  
    23  const (
    24  	byTable int = 1 + iota
    25  	byNetwork
    26  )
    27  
    28  // NetworkDB instance drives the networkdb cluster and acts the broker
    29  // for cluster-scoped and network-scoped gossip and watches.
    30  type NetworkDB struct {
    31  	// The clocks MUST be the first things
    32  	// in this struct due to Golang issue #599.
    33  
    34  	// Global lamport clock for node network attach events.
    35  	networkClock serf.LamportClock
    36  
    37  	// Global lamport clock for table events.
    38  	tableClock serf.LamportClock
    39  
    40  	sync.RWMutex
    41  
    42  	// NetworkDB configuration.
    43  	config *Config
    44  
    45  	// All the tree index (byTable, byNetwork) that we maintain
    46  	// the db.
    47  	indexes map[int]*iradix.Tree
    48  
    49  	// Memberlist we use to drive the cluster.
    50  	memberlist *memberlist.Memberlist
    51  
    52  	// List of all peer nodes in the cluster not-limited to any
    53  	// network.
    54  	nodes map[string]*node
    55  
    56  	// List of all peer nodes which have failed
    57  	failedNodes map[string]*node
    58  
    59  	// List of all peer nodes which have left
    60  	leftNodes map[string]*node
    61  
    62  	// A multi-dimensional map of network/node attachments. The
    63  	// first key is a node name and the second key is a network ID
    64  	// for the network that node is participating in.
    65  	networks map[string]map[string]*network
    66  
    67  	// A map of nodes which are participating in a given
    68  	// network. The key is a network ID.
    69  	networkNodes map[string][]string
    70  
    71  	// A table of ack channels for every node from which we are
    72  	// waiting for an ack.
    73  	bulkSyncAckTbl map[string]chan struct{}
    74  
    75  	// Broadcast queue for network event gossip.
    76  	networkBroadcasts *memberlist.TransmitLimitedQueue
    77  
    78  	// Broadcast queue for node event gossip.
    79  	nodeBroadcasts *memberlist.TransmitLimitedQueue
    80  
    81  	// A central context to stop all go routines running on
    82  	// behalf of the NetworkDB instance.
    83  	ctx       context.Context
    84  	cancelCtx context.CancelFunc
    85  
    86  	// A central broadcaster for all local watchers watching table
    87  	// events.
    88  	broadcaster *events.Broadcaster
    89  
    90  	// List of all tickers which needed to be stopped when
    91  	// cleaning up.
    92  	tickers []*time.Ticker
    93  
    94  	// Reference to the memberlist's keyring to add & remove keys
    95  	keyring *memberlist.Keyring
    96  
    97  	// bootStrapIP is the list of IPs that can be used to bootstrap
    98  	// the gossip.
    99  	bootStrapIP []string
   100  
   101  	// lastStatsTimestamp is the last timestamp when the stats got printed
   102  	lastStatsTimestamp time.Time
   103  
   104  	// lastHealthTimestamp is the last timestamp when the health score got printed
   105  	lastHealthTimestamp time.Time
   106  }
   107  
   108  // PeerInfo represents the peer (gossip cluster) nodes of a network
   109  type PeerInfo struct {
   110  	Name string
   111  	IP   string
   112  }
   113  
   114  // PeerClusterInfo represents the peer (gossip cluster) nodes
   115  type PeerClusterInfo struct {
   116  	PeerInfo
   117  }
   118  
   119  type node struct {
   120  	memberlist.Node
   121  	ltime serf.LamportTime
   122  	// Number of hours left before the reaper removes the node
   123  	reapTime time.Duration
   124  }
   125  
   126  // network describes the node/network attachment.
   127  type network struct {
   128  	// Network ID
   129  	id string
   130  
   131  	// Lamport time for the latest state of the entry.
   132  	ltime serf.LamportTime
   133  
   134  	// Gets set to true after the first bulk sync happens
   135  	inSync bool
   136  
   137  	// Node leave is in progress.
   138  	leaving bool
   139  
   140  	// Number of seconds still left before a deleted network entry gets
   141  	// removed from networkDB
   142  	reapTime time.Duration
   143  
   144  	// The broadcast queue for table event gossip. This is only
   145  	// initialized for this node's network attachment entries.
   146  	tableBroadcasts *memberlist.TransmitLimitedQueue
   147  
   148  	// Number of gossip messages sent related to this network during the last stats collection period
   149  	qMessagesSent atomic.Int64
   150  
   151  	// Number of entries on the network. This value is the sum of all the entries of all the tables of a specific network.
   152  	// Its use is for statistics purposes. It keep tracks of database size and is printed per network every StatsPrintPeriod
   153  	// interval
   154  	entriesNumber atomic.Int64
   155  }
   156  
   157  // Config represents the configuration of the networkdb instance and
   158  // can be passed by the caller.
   159  type Config struct {
   160  	// NodeID is the node unique identifier of the node when is part of the cluster
   161  	NodeID string
   162  
   163  	// Hostname is the node hostname.
   164  	Hostname string
   165  
   166  	// BindAddr is the IP on which networkdb listens. It can be
   167  	// 0.0.0.0 to listen on all addresses on the host.
   168  	BindAddr string
   169  
   170  	// AdvertiseAddr is the node's IP address that we advertise for
   171  	// cluster communication.
   172  	AdvertiseAddr string
   173  
   174  	// BindPort is the local node's port to which we bind to for
   175  	// cluster communication.
   176  	BindPort int
   177  
   178  	// Keys to be added to the Keyring of the memberlist. Key at index
   179  	// 0 is the primary key
   180  	Keys [][]byte
   181  
   182  	// PacketBufferSize is the maximum number of bytes that memberlist will
   183  	// put in a packet (this will be for UDP packets by default with a NetTransport).
   184  	// A safe value for this is typically 1400 bytes (which is the default). However,
   185  	// depending on your network's MTU (Maximum Transmission Unit) you may
   186  	// be able to increase this to get more content into each gossip packet.
   187  	PacketBufferSize int
   188  
   189  	// reapEntryInterval duration of a deleted entry before being garbage collected
   190  	reapEntryInterval time.Duration
   191  
   192  	// reapNetworkInterval duration of a delted network before being garbage collected
   193  	// NOTE this MUST always be higher than reapEntryInterval
   194  	reapNetworkInterval time.Duration
   195  
   196  	// rejoinClusterDuration represents retryJoin timeout used by rejoinClusterBootStrap.
   197  	// Default is 10sec.
   198  	rejoinClusterDuration time.Duration
   199  
   200  	// rejoinClusterInterval represents interval on which rejoinClusterBootStrap runs.
   201  	// Default is 60sec.
   202  	rejoinClusterInterval time.Duration
   203  
   204  	// StatsPrintPeriod the period to use to print queue stats
   205  	// Default is 5min
   206  	StatsPrintPeriod time.Duration
   207  
   208  	// HealthPrintPeriod the period to use to print the health score
   209  	// Default is 1min
   210  	HealthPrintPeriod time.Duration
   211  }
   212  
   213  // entry defines a table entry
   214  type entry struct {
   215  	// node from which this entry was learned.
   216  	node string
   217  
   218  	// Lamport time for the most recent update to the entry
   219  	ltime serf.LamportTime
   220  
   221  	// Opaque value store in the entry
   222  	value []byte
   223  
   224  	// Deleting the entry is in progress. All entries linger in
   225  	// the cluster for certain amount of time after deletion.
   226  	deleting bool
   227  
   228  	// Number of seconds still left before a deleted table entry gets
   229  	// removed from networkDB
   230  	reapTime time.Duration
   231  }
   232  
   233  // DefaultConfig returns a NetworkDB config with default values
   234  func DefaultConfig() *Config {
   235  	hostname, _ := os.Hostname()
   236  	return &Config{
   237  		NodeID:                stringid.TruncateID(stringid.GenerateRandomID()),
   238  		Hostname:              hostname,
   239  		BindAddr:              "0.0.0.0",
   240  		PacketBufferSize:      1400,
   241  		StatsPrintPeriod:      5 * time.Minute,
   242  		HealthPrintPeriod:     1 * time.Minute,
   243  		reapEntryInterval:     30 * time.Minute,
   244  		rejoinClusterDuration: 10 * time.Second,
   245  		rejoinClusterInterval: 60 * time.Second,
   246  	}
   247  }
   248  
   249  // New creates a new instance of NetworkDB using the Config passed by
   250  // the caller.
   251  func New(c *Config) (*NetworkDB, error) {
   252  	// The garbage collection logic for entries leverage the presence of the network.
   253  	// For this reason the expiration time of the network is put slightly higher than the entry expiration so that
   254  	// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
   255  	c.reapNetworkInterval = c.reapEntryInterval + 5*reapPeriod
   256  
   257  	nDB := &NetworkDB{
   258  		config:         c,
   259  		indexes:        make(map[int]*iradix.Tree),
   260  		networks:       make(map[string]map[string]*network),
   261  		nodes:          make(map[string]*node),
   262  		failedNodes:    make(map[string]*node),
   263  		leftNodes:      make(map[string]*node),
   264  		networkNodes:   make(map[string][]string),
   265  		bulkSyncAckTbl: make(map[string]chan struct{}),
   266  		broadcaster:    events.NewBroadcaster(),
   267  	}
   268  
   269  	nDB.indexes[byTable] = iradix.New()
   270  	nDB.indexes[byNetwork] = iradix.New()
   271  
   272  	log.G(context.TODO()).Infof("New memberlist node - Node:%v will use memberlist nodeID:%v with config:%+v", c.Hostname, c.NodeID, c)
   273  	if err := nDB.clusterInit(); err != nil {
   274  		return nil, err
   275  	}
   276  
   277  	return nDB, nil
   278  }
   279  
   280  // Join joins this NetworkDB instance with a list of peer NetworkDB
   281  // instances passed by the caller in the form of addr:port
   282  func (nDB *NetworkDB) Join(members []string) error {
   283  	nDB.Lock()
   284  	nDB.bootStrapIP = append([]string(nil), members...)
   285  	log.G(context.TODO()).Infof("The new bootstrap node list is:%v", nDB.bootStrapIP)
   286  	nDB.Unlock()
   287  	return nDB.clusterJoin(members)
   288  }
   289  
   290  // Close destroys this NetworkDB instance by leave the cluster,
   291  // stopping timers, canceling goroutines etc.
   292  func (nDB *NetworkDB) Close() {
   293  	if err := nDB.clusterLeave(); err != nil {
   294  		log.G(context.TODO()).Errorf("%v(%v) Could not close DB: %v", nDB.config.Hostname, nDB.config.NodeID, err)
   295  	}
   296  
   297  	// Avoid (*Broadcaster).run goroutine leak
   298  	nDB.broadcaster.Close()
   299  }
   300  
   301  // ClusterPeers returns all the gossip cluster peers.
   302  func (nDB *NetworkDB) ClusterPeers() []PeerInfo {
   303  	nDB.RLock()
   304  	defer nDB.RUnlock()
   305  	peers := make([]PeerInfo, 0, len(nDB.nodes))
   306  	for _, node := range nDB.nodes {
   307  		peers = append(peers, PeerInfo{
   308  			Name: node.Name,
   309  			IP:   node.Node.Addr.String(),
   310  		})
   311  	}
   312  	return peers
   313  }
   314  
   315  // Peers returns the gossip peers for a given network.
   316  func (nDB *NetworkDB) Peers(nid string) []PeerInfo {
   317  	nDB.RLock()
   318  	defer nDB.RUnlock()
   319  	peers := make([]PeerInfo, 0, len(nDB.networkNodes[nid]))
   320  	for _, nodeName := range nDB.networkNodes[nid] {
   321  		if node, ok := nDB.nodes[nodeName]; ok {
   322  			peers = append(peers, PeerInfo{
   323  				Name: node.Name,
   324  				IP:   node.Addr.String(),
   325  			})
   326  		} else {
   327  			// Added for testing purposes, this condition should never happen else mean that the network list
   328  			// is out of sync with the node list
   329  			peers = append(peers, PeerInfo{Name: nodeName, IP: "unknown"})
   330  		}
   331  	}
   332  	return peers
   333  }
   334  
   335  // GetEntry retrieves the value of a table entry in a given (network,
   336  // table, key) tuple
   337  func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
   338  	nDB.RLock()
   339  	defer nDB.RUnlock()
   340  	entry, err := nDB.getEntry(tname, nid, key)
   341  	if err != nil {
   342  		return nil, err
   343  	}
   344  	if entry != nil && entry.deleting {
   345  		return nil, types.NotFoundErrorf("entry in table %s network id %s and key %s deleted and pending garbage collection", tname, nid, key)
   346  	}
   347  
   348  	return entry.value, nil
   349  }
   350  
   351  func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
   352  	e, ok := nDB.indexes[byTable].Get([]byte(fmt.Sprintf("/%s/%s/%s", tname, nid, key)))
   353  	if !ok {
   354  		return nil, types.NotFoundErrorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
   355  	}
   356  
   357  	return e.(*entry), nil
   358  }
   359  
   360  // CreateEntry creates a table entry in NetworkDB for given (network,
   361  // table, key) tuple and if the NetworkDB is part of the cluster
   362  // propagates this event to the cluster. It is an error to create an
   363  // entry for the same tuple for which there is already an existing
   364  // entry unless the current entry is deleting state.
   365  func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
   366  	nDB.Lock()
   367  	oldEntry, err := nDB.getEntry(tname, nid, key)
   368  	if err == nil || (oldEntry != nil && !oldEntry.deleting) {
   369  		nDB.Unlock()
   370  		return fmt.Errorf("cannot create entry in table %s with network id %s and key %s, already exists", tname, nid, key)
   371  	}
   372  
   373  	entry := &entry{
   374  		ltime: nDB.tableClock.Increment(),
   375  		node:  nDB.config.NodeID,
   376  		value: value,
   377  	}
   378  
   379  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   380  	nDB.Unlock()
   381  
   382  	if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil {
   383  		return fmt.Errorf("cannot send create event for table %s, %v", tname, err)
   384  	}
   385  
   386  	return nil
   387  }
   388  
   389  // UpdateEntry updates a table entry in NetworkDB for given (network,
   390  // table, key) tuple and if the NetworkDB is part of the cluster
   391  // propagates this event to the cluster. It is an error to update a
   392  // non-existent entry.
   393  func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
   394  	nDB.Lock()
   395  	if _, err := nDB.getEntry(tname, nid, key); err != nil {
   396  		nDB.Unlock()
   397  		return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
   398  	}
   399  
   400  	entry := &entry{
   401  		ltime: nDB.tableClock.Increment(),
   402  		node:  nDB.config.NodeID,
   403  		value: value,
   404  	}
   405  
   406  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   407  	nDB.Unlock()
   408  
   409  	if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil {
   410  		return fmt.Errorf("cannot send table update event: %v", err)
   411  	}
   412  
   413  	return nil
   414  }
   415  
   416  // TableElem elem
   417  type TableElem struct {
   418  	Value []byte
   419  	owner string
   420  }
   421  
   422  // GetTableByNetwork walks the networkdb by the give table and network id and
   423  // returns a map of keys and values
   424  func (nDB *NetworkDB) GetTableByNetwork(tname, nid string) map[string]*TableElem {
   425  	entries := make(map[string]*TableElem)
   426  	nDB.indexes[byTable].Root().WalkPrefix([]byte(fmt.Sprintf("/%s/%s", tname, nid)), func(k []byte, v interface{}) bool {
   427  		entry := v.(*entry)
   428  		if entry.deleting {
   429  			return false
   430  		}
   431  		key := string(k)
   432  		key = key[strings.LastIndex(key, "/")+1:]
   433  		entries[key] = &TableElem{Value: entry.value, owner: entry.node}
   434  		return false
   435  	})
   436  	return entries
   437  }
   438  
   439  // DeleteEntry deletes a table entry in NetworkDB for given (network,
   440  // table, key) tuple and if the NetworkDB is part of the cluster
   441  // propagates this event to the cluster.
   442  func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
   443  	nDB.Lock()
   444  	oldEntry, err := nDB.getEntry(tname, nid, key)
   445  	if err != nil || oldEntry == nil || oldEntry.deleting {
   446  		nDB.Unlock()
   447  		return fmt.Errorf("cannot delete entry %s with network id %s and key %s "+
   448  			"does not exist or is already being deleted", tname, nid, key)
   449  	}
   450  
   451  	entry := &entry{
   452  		ltime:    nDB.tableClock.Increment(),
   453  		node:     nDB.config.NodeID,
   454  		value:    oldEntry.value,
   455  		deleting: true,
   456  		reapTime: nDB.config.reapEntryInterval,
   457  	}
   458  
   459  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   460  	nDB.Unlock()
   461  
   462  	if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
   463  		return fmt.Errorf("cannot send table delete event: %v", err)
   464  	}
   465  
   466  	return nil
   467  }
   468  
   469  func (nDB *NetworkDB) deleteNodeFromNetworks(deletedNode string) {
   470  	for nid, nodes := range nDB.networkNodes {
   471  		updatedNodes := make([]string, 0, len(nodes))
   472  		for _, node := range nodes {
   473  			if node == deletedNode {
   474  				continue
   475  			}
   476  
   477  			updatedNodes = append(updatedNodes, node)
   478  		}
   479  
   480  		nDB.networkNodes[nid] = updatedNodes
   481  	}
   482  
   483  	delete(nDB.networks, deletedNode)
   484  }
   485  
   486  // deleteNodeNetworkEntries is called in 2 conditions with 2 different outcomes:
   487  // 1) when a notification is coming of a node leaving the network
   488  //   - Walk all the network entries and mark the leaving node's entries for deletion
   489  //     These will be garbage collected when the reap timer will expire
   490  //
   491  // 2) when the local node is leaving the network
   492  //   - Walk all the network entries:
   493  //     A) if the entry is owned by the local node
   494  //     then we will mark it for deletion. This will ensure that if a node did not
   495  //     yet received the notification that the local node is leaving, will be aware
   496  //     of the entries to be deleted.
   497  //     B) if the entry is owned by a remote node, then we can safely delete it. This
   498  //     ensures that if we join back this network as we receive the CREATE event for
   499  //     entries owned by remote nodes, we will accept them and we notify the application
   500  func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
   501  	// Indicates if the delete is triggered for the local node
   502  	isNodeLocal := node == nDB.config.NodeID
   503  
   504  	nDB.indexes[byNetwork].Root().WalkPrefix([]byte("/"+nid),
   505  		func(path []byte, v interface{}) bool {
   506  			oldEntry := v.(*entry)
   507  			params := strings.Split(string(path[1:]), "/")
   508  			nid := params[0]
   509  			tname := params[1]
   510  			key := params[2]
   511  
   512  			// If the entry is owned by a remote node and this node is not leaving the network
   513  			if oldEntry.node != node && !isNodeLocal {
   514  				// Don't do anything because the event is triggered for a node that does not own this entry
   515  				return false
   516  			}
   517  
   518  			// If this entry is already marked for deletion and this node is not leaving the network
   519  			if oldEntry.deleting && !isNodeLocal {
   520  				// Don't do anything this entry will be already garbage collected using the old reapTime
   521  				return false
   522  			}
   523  
   524  			entry := &entry{
   525  				ltime:    oldEntry.ltime,
   526  				node:     oldEntry.node,
   527  				value:    oldEntry.value,
   528  				deleting: true,
   529  				reapTime: nDB.config.reapEntryInterval,
   530  			}
   531  
   532  			// we arrived at this point in 2 cases:
   533  			// 1) this entry is owned by the node that is leaving the network
   534  			// 2) the local node is leaving the network
   535  			if oldEntry.node == node {
   536  				if isNodeLocal {
   537  					// TODO fcrisciani: this can be removed if there is no way to leave the network
   538  					// without doing a delete of all the objects
   539  					entry.ltime++
   540  				}
   541  
   542  				if !oldEntry.deleting {
   543  					nDB.createOrUpdateEntry(nid, tname, key, entry)
   544  				}
   545  			} else {
   546  				// the local node is leaving the network, all the entries of remote nodes can be safely removed
   547  				nDB.deleteEntry(nid, tname, key)
   548  			}
   549  
   550  			// Notify to the upper layer only entries not already marked for deletion
   551  			if !oldEntry.deleting {
   552  				nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
   553  			}
   554  			return false
   555  		})
   556  }
   557  
   558  func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
   559  	nDB.indexes[byTable].Root().Walk(func(path []byte, v interface{}) bool {
   560  		oldEntry := v.(*entry)
   561  		if oldEntry.node != node {
   562  			return false
   563  		}
   564  
   565  		params := strings.Split(string(path[1:]), "/")
   566  		tname := params[0]
   567  		nid := params[1]
   568  		key := params[2]
   569  
   570  		nDB.deleteEntry(nid, tname, key)
   571  
   572  		if !oldEntry.deleting {
   573  			nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, oldEntry.value))
   574  		}
   575  		return false
   576  	})
   577  }
   578  
   579  // WalkTable walks a single table in NetworkDB and invokes the passed
   580  // function for each entry in the table passing the network, key,
   581  // value. The walk stops if the passed function returns a true.
   582  func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte, bool) bool) error {
   583  	nDB.RLock()
   584  	values := make(map[string]interface{})
   585  	nDB.indexes[byTable].Root().WalkPrefix([]byte("/"+tname), func(path []byte, v interface{}) bool {
   586  		values[string(path)] = v
   587  		return false
   588  	})
   589  	nDB.RUnlock()
   590  
   591  	for k, v := range values {
   592  		params := strings.Split(k[1:], "/")
   593  		nid := params[1]
   594  		key := params[2]
   595  		if fn(nid, key, v.(*entry).value, v.(*entry).deleting) {
   596  			return nil
   597  		}
   598  	}
   599  
   600  	return nil
   601  }
   602  
   603  // JoinNetwork joins this node to a given network and propagates this
   604  // event across the cluster. This triggers this node joining the
   605  // sub-cluster of this network and participates in the network-scoped
   606  // gossip and bulk sync for this network.
   607  func (nDB *NetworkDB) JoinNetwork(nid string) error {
   608  	ltime := nDB.networkClock.Increment()
   609  
   610  	nDB.Lock()
   611  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   612  	if !ok {
   613  		nodeNetworks = make(map[string]*network)
   614  		nDB.networks[nDB.config.NodeID] = nodeNetworks
   615  	}
   616  	n, ok := nodeNetworks[nid]
   617  	var entries int64
   618  	if ok {
   619  		entries = n.entriesNumber.Load()
   620  	}
   621  	nodeNetworks[nid] = &network{id: nid, ltime: ltime}
   622  	nodeNetworks[nid].entriesNumber.Store(entries)
   623  	nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{
   624  		NumNodes: func() int {
   625  			// TODO fcrisciani this can be optimized maybe avoiding the lock?
   626  			// this call is done each GetBroadcasts call to evaluate the number of
   627  			// replicas for the message
   628  			nDB.RLock()
   629  			defer nDB.RUnlock()
   630  			return len(nDB.networkNodes[nid])
   631  		},
   632  		RetransmitMult: 4,
   633  	}
   634  	nDB.addNetworkNode(nid, nDB.config.NodeID)
   635  	networkNodes := nDB.networkNodes[nid]
   636  	n = nodeNetworks[nid]
   637  	nDB.Unlock()
   638  
   639  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
   640  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   641  	}
   642  
   643  	log.G(context.TODO()).Debugf("%v(%v): joined network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   644  	if _, err := nDB.bulkSync(networkNodes, true); err != nil {
   645  		log.G(context.TODO()).Errorf("Error bulk syncing while joining network %s: %v", nid, err)
   646  	}
   647  
   648  	// Mark the network as being synced
   649  	// note this is a best effort, we are not checking the result of the bulk sync
   650  	nDB.Lock()
   651  	n.inSync = true
   652  	nDB.Unlock()
   653  
   654  	return nil
   655  }
   656  
   657  // LeaveNetwork leaves this node from a given network and propagates
   658  // this event across the cluster. This triggers this node leaving the
   659  // sub-cluster of this network and as a result will no longer
   660  // participate in the network-scoped gossip and bulk sync for this
   661  // network. Also remove all the table entries for this network from
   662  // networkdb
   663  func (nDB *NetworkDB) LeaveNetwork(nid string) error {
   664  	ltime := nDB.networkClock.Increment()
   665  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeLeave, ltime); err != nil {
   666  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   667  	}
   668  
   669  	nDB.Lock()
   670  	defer nDB.Unlock()
   671  
   672  	// Remove myself from the list of the nodes participating to the network
   673  	nDB.deleteNetworkNode(nid, nDB.config.NodeID)
   674  
   675  	// Update all the local entries marking them for deletion and delete all the remote entries
   676  	nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeID)
   677  
   678  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   679  	if !ok {
   680  		return fmt.Errorf("could not find self node for network %s while trying to leave", nid)
   681  	}
   682  
   683  	n, ok := nodeNetworks[nid]
   684  	if !ok {
   685  		return fmt.Errorf("could not find network %s while trying to leave", nid)
   686  	}
   687  
   688  	log.G(context.TODO()).Debugf("%v(%v): leaving network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   689  	n.ltime = ltime
   690  	n.reapTime = nDB.config.reapNetworkInterval
   691  	n.leaving = true
   692  	return nil
   693  }
   694  
   695  // addNetworkNode adds the node to the list of nodes which participate
   696  // in the passed network only if it is not already present. Caller
   697  // should hold the NetworkDB lock while calling this
   698  func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
   699  	nodes := nDB.networkNodes[nid]
   700  	for _, node := range nodes {
   701  		if node == nodeName {
   702  			return
   703  		}
   704  	}
   705  
   706  	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nodeName)
   707  }
   708  
   709  // Deletes the node from the list of nodes which participate in the
   710  // passed network. Caller should hold the NetworkDB lock while calling
   711  // this
   712  func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
   713  	nodes, ok := nDB.networkNodes[nid]
   714  	if !ok || len(nodes) == 0 {
   715  		return
   716  	}
   717  	newNodes := make([]string, 0, len(nodes)-1)
   718  	for _, name := range nodes {
   719  		if name == nodeName {
   720  			continue
   721  		}
   722  		newNodes = append(newNodes, name)
   723  	}
   724  	nDB.networkNodes[nid] = newNodes
   725  }
   726  
   727  // findCommonnetworks find the networks that both this node and the
   728  // passed node have joined.
   729  func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string {
   730  	nDB.RLock()
   731  	defer nDB.RUnlock()
   732  
   733  	var networks []string
   734  	for nid := range nDB.networks[nDB.config.NodeID] {
   735  		if n, ok := nDB.networks[nodeName][nid]; ok {
   736  			if !n.leaving {
   737  				networks = append(networks, nid)
   738  			}
   739  		}
   740  	}
   741  
   742  	return networks
   743  }
   744  
   745  func (nDB *NetworkDB) updateLocalNetworkTime() {
   746  	nDB.Lock()
   747  	defer nDB.Unlock()
   748  
   749  	ltime := nDB.networkClock.Increment()
   750  	for _, n := range nDB.networks[nDB.config.NodeID] {
   751  		n.ltime = ltime
   752  	}
   753  }
   754  
   755  // createOrUpdateEntry this function handles the creation or update of entries into the local
   756  // tree store. It is also used to keep in sync the entries number of the network (all tables are aggregated)
   757  func (nDB *NetworkDB) createOrUpdateEntry(nid, tname, key string, entry interface{}) (okTable bool, okNetwork bool) {
   758  	nDB.indexes[byTable], _, okTable = nDB.indexes[byTable].Insert([]byte(fmt.Sprintf("/%s/%s/%s", tname, nid, key)), entry)
   759  	nDB.indexes[byNetwork], _, okNetwork = nDB.indexes[byNetwork].Insert([]byte(fmt.Sprintf("/%s/%s/%s", nid, tname, key)), entry)
   760  	if !okNetwork {
   761  		// Add only if it is an insert not an update
   762  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   763  		if ok {
   764  			n.entriesNumber.Add(1)
   765  		}
   766  	}
   767  	return okTable, okNetwork
   768  }
   769  
   770  // deleteEntry this function handles the deletion of entries into the local tree store.
   771  // It is also used to keep in sync the entries number of the network (all tables are aggregated)
   772  func (nDB *NetworkDB) deleteEntry(nid, tname, key string) (okTable bool, okNetwork bool) {
   773  	nDB.indexes[byTable], _, okTable = nDB.indexes[byTable].Delete([]byte(fmt.Sprintf("/%s/%s/%s", tname, nid, key)))
   774  	nDB.indexes[byNetwork], _, okNetwork = nDB.indexes[byNetwork].Delete([]byte(fmt.Sprintf("/%s/%s/%s", nid, tname, key)))
   775  	if okNetwork {
   776  		// Remove only if the delete is successful
   777  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   778  		if ok {
   779  			n.entriesNumber.Add(-1)
   780  		}
   781  	}
   782  	return okTable, okNetwork
   783  }