github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/networkdb/networkdb.go (about)

     1  package networkdb
     2  
     3  //go:generate protoc -I.:../vendor/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork/networkdb,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. networkdb.proto
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/armon/go-radix"
    14  	"github.com/docker/docker/pkg/stringid"
    15  	"github.com/docker/go-events"
    16  	"github.com/docker/libnetwork/types"
    17  	"github.com/hashicorp/memberlist"
    18  	"github.com/hashicorp/serf/serf"
    19  	"github.com/sirupsen/logrus"
    20  )
    21  
    22  const (
    23  	byTable int = 1 + iota
    24  	byNetwork
    25  )
    26  
    27  // NetworkDB instance drives the networkdb cluster and acts the broker
    28  // for cluster-scoped and network-scoped gossip and watches.
    29  type NetworkDB struct {
    30  	// The clocks MUST be the first things
    31  	// in this struct due to Golang issue #599.
    32  
    33  	// Global lamport clock for node network attach events.
    34  	networkClock serf.LamportClock
    35  
    36  	// Global lamport clock for table events.
    37  	tableClock serf.LamportClock
    38  
    39  	sync.RWMutex
    40  
    41  	// NetworkDB configuration.
    42  	config *Config
    43  
    44  	// All the tree index (byTable, byNetwork) that we maintain
    45  	// the db.
    46  	indexes map[int]*radix.Tree
    47  
    48  	// Memberlist we use to drive the cluster.
    49  	memberlist *memberlist.Memberlist
    50  
    51  	// List of all peer nodes in the cluster not-limited to any
    52  	// network.
    53  	nodes map[string]*node
    54  
    55  	// List of all peer nodes which have failed
    56  	failedNodes map[string]*node
    57  
    58  	// List of all peer nodes which have left
    59  	leftNodes map[string]*node
    60  
    61  	// A multi-dimensional map of network/node attachments. The
    62  	// first key is a node name and the second key is a network ID
    63  	// for the network that node is participating in.
    64  	networks map[string]map[string]*network
    65  
    66  	// A map of nodes which are participating in a given
    67  	// network. The key is a network ID.
    68  	networkNodes map[string][]string
    69  
    70  	// A table of ack channels for every node from which we are
    71  	// waiting for an ack.
    72  	bulkSyncAckTbl map[string]chan struct{}
    73  
    74  	// Broadcast queue for network event gossip.
    75  	networkBroadcasts *memberlist.TransmitLimitedQueue
    76  
    77  	// Broadcast queue for node event gossip.
    78  	nodeBroadcasts *memberlist.TransmitLimitedQueue
    79  
    80  	// A central context to stop all go routines running on
    81  	// behalf of the NetworkDB instance.
    82  	ctx       context.Context
    83  	cancelCtx context.CancelFunc
    84  
    85  	// A central broadcaster for all local watchers watching table
    86  	// events.
    87  	broadcaster *events.Broadcaster
    88  
    89  	// List of all tickers which needed to be stopped when
    90  	// cleaning up.
    91  	tickers []*time.Ticker
    92  
    93  	// Reference to the memberlist's keyring to add & remove keys
    94  	keyring *memberlist.Keyring
    95  
    96  	// bootStrapIP is the list of IPs that can be used to bootstrap
    97  	// the gossip.
    98  	bootStrapIP []string
    99  
   100  	// lastStatsTimestamp is the last timestamp when the stats got printed
   101  	lastStatsTimestamp time.Time
   102  
   103  	// lastHealthTimestamp is the last timestamp when the health score got printed
   104  	lastHealthTimestamp time.Time
   105  }
   106  
   107  // PeerInfo represents the peer (gossip cluster) nodes of a network
   108  type PeerInfo struct {
   109  	Name string
   110  	IP   string
   111  }
   112  
   113  // PeerClusterInfo represents the peer (gossip cluster) nodes
   114  type PeerClusterInfo struct {
   115  	PeerInfo
   116  }
   117  
   118  type node struct {
   119  	memberlist.Node
   120  	ltime serf.LamportTime
   121  	// Number of hours left before the reaper removes the node
   122  	reapTime time.Duration
   123  }
   124  
   125  // network describes the node/network attachment.
   126  type network struct {
   127  	// Network ID
   128  	id string
   129  
   130  	// Lamport time for the latest state of the entry.
   131  	ltime serf.LamportTime
   132  
   133  	// Gets set to true after the first bulk sync happens
   134  	inSync bool
   135  
   136  	// Node leave is in progress.
   137  	leaving bool
   138  
   139  	// Number of seconds still left before a deleted network entry gets
   140  	// removed from networkDB
   141  	reapTime time.Duration
   142  
   143  	// The broadcast queue for table event gossip. This is only
   144  	// initialized for this node's network attachment entries.
   145  	tableBroadcasts *memberlist.TransmitLimitedQueue
   146  
   147  	// Number of gossip messages sent related to this network during the last stats collection period
   148  	qMessagesSent int
   149  
   150  	// Number of entries on the network. This value is the sum of all the entries of all the tables of a specific network.
   151  	// Its use is for statistics purposes. It keep tracks of database size and is printed per network every StatsPrintPeriod
   152  	// interval
   153  	entriesNumber int
   154  }
   155  
   156  // Config represents the configuration of the networkdb instance and
   157  // can be passed by the caller.
   158  type Config struct {
   159  	// NodeID is the node unique identifier of the node when is part of the cluster
   160  	NodeID string
   161  
   162  	// Hostname is the node hostname.
   163  	Hostname string
   164  
   165  	// BindAddr is the IP on which networkdb listens. It can be
   166  	// 0.0.0.0 to listen on all addresses on the host.
   167  	BindAddr string
   168  
   169  	// AdvertiseAddr is the node's IP address that we advertise for
   170  	// cluster communication.
   171  	AdvertiseAddr string
   172  
   173  	// BindPort is the local node's port to which we bind to for
   174  	// cluster communication.
   175  	BindPort int
   176  
   177  	// Keys to be added to the Keyring of the memberlist. Key at index
   178  	// 0 is the primary key
   179  	Keys [][]byte
   180  
   181  	// PacketBufferSize is the maximum number of bytes that memberlist will
   182  	// put in a packet (this will be for UDP packets by default with a NetTransport).
   183  	// A safe value for this is typically 1400 bytes (which is the default). However,
   184  	// depending on your network's MTU (Maximum Transmission Unit) you may
   185  	// be able to increase this to get more content into each gossip packet.
   186  	PacketBufferSize int
   187  
   188  	// reapEntryInterval duration of a deleted entry before being garbage collected
   189  	reapEntryInterval time.Duration
   190  
   191  	// reapNetworkInterval duration of a delted network before being garbage collected
   192  	// NOTE this MUST always be higher than reapEntryInterval
   193  	reapNetworkInterval time.Duration
   194  
   195  	// StatsPrintPeriod the period to use to print queue stats
   196  	// Default is 5min
   197  	StatsPrintPeriod time.Duration
   198  
   199  	// HealthPrintPeriod the period to use to print the health score
   200  	// Default is 1min
   201  	HealthPrintPeriod time.Duration
   202  }
   203  
   204  // entry defines a table entry
   205  type entry struct {
   206  	// node from which this entry was learned.
   207  	node string
   208  
   209  	// Lamport time for the most recent update to the entry
   210  	ltime serf.LamportTime
   211  
   212  	// Opaque value store in the entry
   213  	value []byte
   214  
   215  	// Deleting the entry is in progress. All entries linger in
   216  	// the cluster for certain amount of time after deletion.
   217  	deleting bool
   218  
   219  	// Number of seconds still left before a deleted table entry gets
   220  	// removed from networkDB
   221  	reapTime time.Duration
   222  }
   223  
   224  // DefaultConfig returns a NetworkDB config with default values
   225  func DefaultConfig() *Config {
   226  	hostname, _ := os.Hostname()
   227  	return &Config{
   228  		NodeID:            stringid.TruncateID(stringid.GenerateRandomID()),
   229  		Hostname:          hostname,
   230  		BindAddr:          "0.0.0.0",
   231  		PacketBufferSize:  1400,
   232  		StatsPrintPeriod:  5 * time.Minute,
   233  		HealthPrintPeriod: 1 * time.Minute,
   234  		reapEntryInterval: 30 * time.Minute,
   235  	}
   236  }
   237  
   238  // New creates a new instance of NetworkDB using the Config passed by
   239  // the caller.
   240  func New(c *Config) (*NetworkDB, error) {
   241  	// The garbage collection logic for entries leverage the presence of the network.
   242  	// For this reason the expiration time of the network is put slightly higher than the entry expiration so that
   243  	// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
   244  	c.reapNetworkInterval = c.reapEntryInterval + 5*reapPeriod
   245  
   246  	nDB := &NetworkDB{
   247  		config:         c,
   248  		indexes:        make(map[int]*radix.Tree),
   249  		networks:       make(map[string]map[string]*network),
   250  		nodes:          make(map[string]*node),
   251  		failedNodes:    make(map[string]*node),
   252  		leftNodes:      make(map[string]*node),
   253  		networkNodes:   make(map[string][]string),
   254  		bulkSyncAckTbl: make(map[string]chan struct{}),
   255  		broadcaster:    events.NewBroadcaster(),
   256  	}
   257  
   258  	nDB.indexes[byTable] = radix.New()
   259  	nDB.indexes[byNetwork] = radix.New()
   260  
   261  	logrus.Infof("New memberlist node - Node:%v will use memberlist nodeID:%v with config:%+v", c.Hostname, c.NodeID, c)
   262  	if err := nDB.clusterInit(); err != nil {
   263  		return nil, err
   264  	}
   265  
   266  	return nDB, nil
   267  }
   268  
   269  // Join joins this NetworkDB instance with a list of peer NetworkDB
   270  // instances passed by the caller in the form of addr:port
   271  func (nDB *NetworkDB) Join(members []string) error {
   272  	nDB.Lock()
   273  	nDB.bootStrapIP = append([]string(nil), members...)
   274  	logrus.Infof("The new bootstrap node list is:%v", nDB.bootStrapIP)
   275  	nDB.Unlock()
   276  	return nDB.clusterJoin(members)
   277  }
   278  
   279  // Close destroys this NetworkDB instance by leave the cluster,
   280  // stopping timers, canceling goroutines etc.
   281  func (nDB *NetworkDB) Close() {
   282  	if err := nDB.clusterLeave(); err != nil {
   283  		logrus.Errorf("%v(%v) Could not close DB: %v", nDB.config.Hostname, nDB.config.NodeID, err)
   284  	}
   285  
   286  	// Avoid (*Broadcaster).run goroutine leak
   287  	nDB.broadcaster.Close()
   288  }
   289  
   290  // ClusterPeers returns all the gossip cluster peers.
   291  func (nDB *NetworkDB) ClusterPeers() []PeerInfo {
   292  	nDB.RLock()
   293  	defer nDB.RUnlock()
   294  	peers := make([]PeerInfo, 0, len(nDB.nodes))
   295  	for _, node := range nDB.nodes {
   296  		peers = append(peers, PeerInfo{
   297  			Name: node.Name,
   298  			IP:   node.Node.Addr.String(),
   299  		})
   300  	}
   301  	return peers
   302  }
   303  
   304  // Peers returns the gossip peers for a given network.
   305  func (nDB *NetworkDB) Peers(nid string) []PeerInfo {
   306  	nDB.RLock()
   307  	defer nDB.RUnlock()
   308  	peers := make([]PeerInfo, 0, len(nDB.networkNodes[nid]))
   309  	for _, nodeName := range nDB.networkNodes[nid] {
   310  		if node, ok := nDB.nodes[nodeName]; ok {
   311  			peers = append(peers, PeerInfo{
   312  				Name: node.Name,
   313  				IP:   node.Addr.String(),
   314  			})
   315  		} else {
   316  			// Added for testing purposes, this condition should never happen else mean that the network list
   317  			// is out of sync with the node list
   318  			peers = append(peers, PeerInfo{Name: nodeName, IP: "unknown"})
   319  		}
   320  	}
   321  	return peers
   322  }
   323  
   324  // GetEntry retrieves the value of a table entry in a given (network,
   325  // table, key) tuple
   326  func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
   327  	nDB.RLock()
   328  	defer nDB.RUnlock()
   329  	entry, err := nDB.getEntry(tname, nid, key)
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  	if entry != nil && entry.deleting {
   334  		return nil, types.NotFoundErrorf("entry in table %s network id %s and key %s deleted and pending garbage collection", tname, nid, key)
   335  	}
   336  
   337  	return entry.value, nil
   338  }
   339  
   340  func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
   341  	e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
   342  	if !ok {
   343  		return nil, types.NotFoundErrorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
   344  	}
   345  
   346  	return e.(*entry), nil
   347  }
   348  
   349  // CreateEntry creates a table entry in NetworkDB for given (network,
   350  // table, key) tuple and if the NetworkDB is part of the cluster
   351  // propagates this event to the cluster. It is an error to create an
   352  // entry for the same tuple for which there is already an existing
   353  // entry unless the current entry is deleting state.
   354  func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
   355  	nDB.Lock()
   356  	oldEntry, err := nDB.getEntry(tname, nid, key)
   357  	if err == nil || (oldEntry != nil && !oldEntry.deleting) {
   358  		nDB.Unlock()
   359  		return fmt.Errorf("cannot create entry in table %s with network id %s and key %s, already exists", tname, nid, key)
   360  	}
   361  
   362  	entry := &entry{
   363  		ltime: nDB.tableClock.Increment(),
   364  		node:  nDB.config.NodeID,
   365  		value: value,
   366  	}
   367  
   368  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   369  	nDB.Unlock()
   370  
   371  	if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil {
   372  		return fmt.Errorf("cannot send create event for table %s, %v", tname, err)
   373  	}
   374  
   375  	return nil
   376  }
   377  
   378  // UpdateEntry updates a table entry in NetworkDB for given (network,
   379  // table, key) tuple and if the NetworkDB is part of the cluster
   380  // propagates this event to the cluster. It is an error to update a
   381  // non-existent entry.
   382  func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
   383  	nDB.Lock()
   384  	if _, err := nDB.getEntry(tname, nid, key); err != nil {
   385  		nDB.Unlock()
   386  		return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
   387  	}
   388  
   389  	entry := &entry{
   390  		ltime: nDB.tableClock.Increment(),
   391  		node:  nDB.config.NodeID,
   392  		value: value,
   393  	}
   394  
   395  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   396  	nDB.Unlock()
   397  
   398  	if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil {
   399  		return fmt.Errorf("cannot send table update event: %v", err)
   400  	}
   401  
   402  	return nil
   403  }
   404  
   405  // TableElem elem
   406  type TableElem struct {
   407  	Value []byte
   408  	owner string
   409  }
   410  
   411  // GetTableByNetwork walks the networkdb by the give table and network id and
   412  // returns a map of keys and values
   413  func (nDB *NetworkDB) GetTableByNetwork(tname, nid string) map[string]*TableElem {
   414  	entries := make(map[string]*TableElem)
   415  	nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s/%s", tname, nid), func(k string, v interface{}) bool {
   416  		entry := v.(*entry)
   417  		if entry.deleting {
   418  			return false
   419  		}
   420  		key := k[strings.LastIndex(k, "/")+1:]
   421  		entries[key] = &TableElem{Value: entry.value, owner: entry.node}
   422  		return false
   423  	})
   424  	return entries
   425  }
   426  
   427  // DeleteEntry deletes a table entry in NetworkDB for given (network,
   428  // table, key) tuple and if the NetworkDB is part of the cluster
   429  // propagates this event to the cluster.
   430  func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
   431  	nDB.Lock()
   432  	oldEntry, err := nDB.getEntry(tname, nid, key)
   433  	if err != nil || oldEntry == nil || oldEntry.deleting {
   434  		nDB.Unlock()
   435  		return fmt.Errorf("cannot delete entry %s with network id %s and key %s "+
   436  			"does not exist or is already being deleted", tname, nid, key)
   437  	}
   438  
   439  	entry := &entry{
   440  		ltime:    nDB.tableClock.Increment(),
   441  		node:     nDB.config.NodeID,
   442  		value:    oldEntry.value,
   443  		deleting: true,
   444  		reapTime: nDB.config.reapEntryInterval,
   445  	}
   446  
   447  	nDB.createOrUpdateEntry(nid, tname, key, entry)
   448  	nDB.Unlock()
   449  
   450  	if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
   451  		return fmt.Errorf("cannot send table delete event: %v", err)
   452  	}
   453  
   454  	return nil
   455  }
   456  
   457  func (nDB *NetworkDB) deleteNodeFromNetworks(deletedNode string) {
   458  	for nid, nodes := range nDB.networkNodes {
   459  		updatedNodes := make([]string, 0, len(nodes))
   460  		for _, node := range nodes {
   461  			if node == deletedNode {
   462  				continue
   463  			}
   464  
   465  			updatedNodes = append(updatedNodes, node)
   466  		}
   467  
   468  		nDB.networkNodes[nid] = updatedNodes
   469  	}
   470  
   471  	delete(nDB.networks, deletedNode)
   472  }
   473  
   474  // deleteNodeNetworkEntries is called in 2 conditions with 2 different outcomes:
   475  // 1) when a notification is coming of a node leaving the network
   476  //   - Walk all the network entries and mark the leaving node's entries for deletion
   477  //     These will be garbage collected when the reap timer will expire
   478  //
   479  // 2) when the local node is leaving the network
   480  //   - Walk all the network entries:
   481  //     A) if the entry is owned by the local node
   482  //     then we will mark it for deletion. This will ensure that if a node did not
   483  //     yet received the notification that the local node is leaving, will be aware
   484  //     of the entries to be deleted.
   485  //     B) if the entry is owned by a remote node, then we can safely delete it. This
   486  //     ensures that if we join back this network as we receive the CREATE event for
   487  //     entries owned by remote nodes, we will accept them and we notify the application
   488  func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
   489  	// Indicates if the delete is triggered for the local node
   490  	isNodeLocal := node == nDB.config.NodeID
   491  
   492  	nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid),
   493  		func(path string, v interface{}) bool {
   494  			oldEntry := v.(*entry)
   495  			params := strings.Split(path[1:], "/")
   496  			nid := params[0]
   497  			tname := params[1]
   498  			key := params[2]
   499  
   500  			// If the entry is owned by a remote node and this node is not leaving the network
   501  			if oldEntry.node != node && !isNodeLocal {
   502  				// Don't do anything because the event is triggered for a node that does not own this entry
   503  				return false
   504  			}
   505  
   506  			// If this entry is already marked for deletion and this node is not leaving the network
   507  			if oldEntry.deleting && !isNodeLocal {
   508  				// Don't do anything this entry will be already garbage collected using the old reapTime
   509  				return false
   510  			}
   511  
   512  			entry := &entry{
   513  				ltime:    oldEntry.ltime,
   514  				node:     oldEntry.node,
   515  				value:    oldEntry.value,
   516  				deleting: true,
   517  				reapTime: nDB.config.reapEntryInterval,
   518  			}
   519  
   520  			// we arrived at this point in 2 cases:
   521  			// 1) this entry is owned by the node that is leaving the network
   522  			// 2) the local node is leaving the network
   523  			if oldEntry.node == node {
   524  				if isNodeLocal {
   525  					// TODO fcrisciani: this can be removed if there is no way to leave the network
   526  					// without doing a delete of all the objects
   527  					entry.ltime++
   528  				}
   529  
   530  				if !oldEntry.deleting {
   531  					nDB.createOrUpdateEntry(nid, tname, key, entry)
   532  				}
   533  			} else {
   534  				// the local node is leaving the network, all the entries of remote nodes can be safely removed
   535  				nDB.deleteEntry(nid, tname, key)
   536  			}
   537  
   538  			// Notify to the upper layer only entries not already marked for deletion
   539  			if !oldEntry.deleting {
   540  				nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
   541  			}
   542  			return false
   543  		})
   544  }
   545  
   546  func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
   547  	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
   548  		oldEntry := v.(*entry)
   549  		if oldEntry.node != node {
   550  			return false
   551  		}
   552  
   553  		params := strings.Split(path[1:], "/")
   554  		tname := params[0]
   555  		nid := params[1]
   556  		key := params[2]
   557  
   558  		nDB.deleteEntry(nid, tname, key)
   559  
   560  		if !oldEntry.deleting {
   561  			nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, oldEntry.value))
   562  		}
   563  		return false
   564  	})
   565  }
   566  
   567  // WalkTable walks a single table in NetworkDB and invokes the passed
   568  // function for each entry in the table passing the network, key,
   569  // value. The walk stops if the passed function returns a true.
   570  func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte, bool) bool) error {
   571  	nDB.RLock()
   572  	values := make(map[string]interface{})
   573  	nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool {
   574  		values[path] = v
   575  		return false
   576  	})
   577  	nDB.RUnlock()
   578  
   579  	for k, v := range values {
   580  		params := strings.Split(k[1:], "/")
   581  		nid := params[1]
   582  		key := params[2]
   583  		if fn(nid, key, v.(*entry).value, v.(*entry).deleting) {
   584  			return nil
   585  		}
   586  	}
   587  
   588  	return nil
   589  }
   590  
   591  // JoinNetwork joins this node to a given network and propagates this
   592  // event across the cluster. This triggers this node joining the
   593  // sub-cluster of this network and participates in the network-scoped
   594  // gossip and bulk sync for this network.
   595  func (nDB *NetworkDB) JoinNetwork(nid string) error {
   596  	ltime := nDB.networkClock.Increment()
   597  
   598  	nDB.Lock()
   599  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   600  	if !ok {
   601  		nodeNetworks = make(map[string]*network)
   602  		nDB.networks[nDB.config.NodeID] = nodeNetworks
   603  	}
   604  	n, ok := nodeNetworks[nid]
   605  	var entries int
   606  	if ok {
   607  		entries = n.entriesNumber
   608  	}
   609  	nodeNetworks[nid] = &network{id: nid, ltime: ltime, entriesNumber: entries}
   610  	nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{
   611  		NumNodes: func() int {
   612  			// TODO fcrisciani this can be optimized maybe avoiding the lock?
   613  			// this call is done each GetBroadcasts call to evaluate the number of
   614  			// replicas for the message
   615  			nDB.RLock()
   616  			defer nDB.RUnlock()
   617  			return len(nDB.networkNodes[nid])
   618  		},
   619  		RetransmitMult: 4,
   620  	}
   621  	nDB.addNetworkNode(nid, nDB.config.NodeID)
   622  	networkNodes := nDB.networkNodes[nid]
   623  	n = nodeNetworks[nid]
   624  	nDB.Unlock()
   625  
   626  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
   627  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   628  	}
   629  
   630  	logrus.Debugf("%v(%v): joined network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   631  	if _, err := nDB.bulkSync(networkNodes, true); err != nil {
   632  		logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
   633  	}
   634  
   635  	// Mark the network as being synced
   636  	// note this is a best effort, we are not checking the result of the bulk sync
   637  	nDB.Lock()
   638  	n.inSync = true
   639  	nDB.Unlock()
   640  
   641  	return nil
   642  }
   643  
   644  // LeaveNetwork leaves this node from a given network and propagates
   645  // this event across the cluster. This triggers this node leaving the
   646  // sub-cluster of this network and as a result will no longer
   647  // participate in the network-scoped gossip and bulk sync for this
   648  // network. Also remove all the table entries for this network from
   649  // networkdb
   650  func (nDB *NetworkDB) LeaveNetwork(nid string) error {
   651  	ltime := nDB.networkClock.Increment()
   652  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeLeave, ltime); err != nil {
   653  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   654  	}
   655  
   656  	nDB.Lock()
   657  	defer nDB.Unlock()
   658  
   659  	// Remove myself from the list of the nodes participating to the network
   660  	nDB.deleteNetworkNode(nid, nDB.config.NodeID)
   661  
   662  	// Update all the local entries marking them for deletion and delete all the remote entries
   663  	nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeID)
   664  
   665  	nodeNetworks, ok := nDB.networks[nDB.config.NodeID]
   666  	if !ok {
   667  		return fmt.Errorf("could not find self node for network %s while trying to leave", nid)
   668  	}
   669  
   670  	n, ok := nodeNetworks[nid]
   671  	if !ok {
   672  		return fmt.Errorf("could not find network %s while trying to leave", nid)
   673  	}
   674  
   675  	logrus.Debugf("%v(%v): leaving network %s", nDB.config.Hostname, nDB.config.NodeID, nid)
   676  	n.ltime = ltime
   677  	n.reapTime = nDB.config.reapNetworkInterval
   678  	n.leaving = true
   679  	return nil
   680  }
   681  
   682  // addNetworkNode adds the node to the list of nodes which participate
   683  // in the passed network only if it is not already present. Caller
   684  // should hold the NetworkDB lock while calling this
   685  func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
   686  	nodes := nDB.networkNodes[nid]
   687  	for _, node := range nodes {
   688  		if node == nodeName {
   689  			return
   690  		}
   691  	}
   692  
   693  	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nodeName)
   694  }
   695  
   696  // Deletes the node from the list of nodes which participate in the
   697  // passed network. Caller should hold the NetworkDB lock while calling
   698  // this
   699  func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
   700  	nodes, ok := nDB.networkNodes[nid]
   701  	if !ok || len(nodes) == 0 {
   702  		return
   703  	}
   704  	newNodes := make([]string, 0, len(nodes)-1)
   705  	for _, name := range nodes {
   706  		if name == nodeName {
   707  			continue
   708  		}
   709  		newNodes = append(newNodes, name)
   710  	}
   711  	nDB.networkNodes[nid] = newNodes
   712  }
   713  
   714  // findCommonnetworks find the networks that both this node and the
   715  // passed node have joined.
   716  func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string {
   717  	nDB.RLock()
   718  	defer nDB.RUnlock()
   719  
   720  	var networks []string
   721  	for nid := range nDB.networks[nDB.config.NodeID] {
   722  		if n, ok := nDB.networks[nodeName][nid]; ok {
   723  			if !n.leaving {
   724  				networks = append(networks, nid)
   725  			}
   726  		}
   727  	}
   728  
   729  	return networks
   730  }
   731  
   732  func (nDB *NetworkDB) updateLocalNetworkTime() {
   733  	nDB.Lock()
   734  	defer nDB.Unlock()
   735  
   736  	ltime := nDB.networkClock.Increment()
   737  	for _, n := range nDB.networks[nDB.config.NodeID] {
   738  		n.ltime = ltime
   739  	}
   740  }
   741  
   742  // createOrUpdateEntry this function handles the creation or update of entries into the local
   743  // tree store. It is also used to keep in sync the entries number of the network (all tables are aggregated)
   744  func (nDB *NetworkDB) createOrUpdateEntry(nid, tname, key string, entry interface{}) (bool, bool) {
   745  	_, okTable := nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   746  	_, okNetwork := nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   747  	if !okNetwork {
   748  		// Add only if it is an insert not an update
   749  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   750  		if ok {
   751  			n.entriesNumber++
   752  		}
   753  	}
   754  	return okTable, okNetwork
   755  }
   756  
   757  // deleteEntry this function handles the deletion of entries into the local tree store.
   758  // It is also used to keep in sync the entries number of the network (all tables are aggregated)
   759  func (nDB *NetworkDB) deleteEntry(nid, tname, key string) (bool, bool) {
   760  	_, okTable := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
   761  	_, okNetwork := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key))
   762  	if okNetwork {
   763  		// Remove only if the delete is successful
   764  		n, ok := nDB.networks[nDB.config.NodeID][nid]
   765  		if ok {
   766  			n.entriesNumber--
   767  		}
   768  	}
   769  	return okTable, okNetwork
   770  }