github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/libnetwork/networkdb/networkdb.go (about)

     1  package networkdb
     2  
     3  //go:generate protoc -I.:../Godeps/_workspace/src/github.com/gogo/protobuf  --gogo_out=import_path=github.com/docker/libnetwork/networkdb,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. networkdb.proto
     4  
     5  import (
     6  	"fmt"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/Sirupsen/logrus"
    12  	"github.com/armon/go-radix"
    13  	"github.com/docker/go-events"
    14  	"github.com/hashicorp/memberlist"
    15  	"github.com/hashicorp/serf/serf"
    16  )
    17  
    18  const (
    19  	byTable int = 1 + iota
    20  	byNetwork
    21  )
    22  
    23  // NetworkDB instance drives the networkdb cluster and acts the broker
    24  // for cluster-scoped and network-scoped gossip and watches.
    25  type NetworkDB struct {
    26  	sync.RWMutex
    27  
    28  	// NetworkDB configuration.
    29  	config *Config
    30  
    31  	// local copy of memberlist config that we use to driver
    32  	// network scoped gossip and bulk sync.
    33  	mConfig *memberlist.Config
    34  
    35  	// All the tree index (byTable, byNetwork) that we maintain
    36  	// the db.
    37  	indexes map[int]*radix.Tree
    38  
    39  	// Memberlist we use to drive the cluster.
    40  	memberlist *memberlist.Memberlist
    41  
    42  	// List of all peer nodes in the cluster not-limited to any
    43  	// network.
    44  	nodes map[string]*node
    45  
    46  	// List of all peer nodes which have failed
    47  	failedNodes map[string]*node
    48  
    49  	// List of all peer nodes which have left
    50  	leftNodes map[string]*node
    51  
    52  	// A multi-dimensional map of network/node attachmemts. The
    53  	// first key is a node name and the second key is a network ID
    54  	// for the network that node is participating in.
    55  	networks map[string]map[string]*network
    56  
    57  	// A map of nodes which are participating in a given
    58  	// network. The key is a network ID.
    59  
    60  	networkNodes map[string][]string
    61  
    62  	// A table of ack channels for every node from which we are
    63  	// waiting for an ack.
    64  	bulkSyncAckTbl map[string]chan struct{}
    65  
    66  	// Global lamport clock for node network attach events.
    67  	networkClock serf.LamportClock
    68  
    69  	// Global lamport clock for table events.
    70  	tableClock serf.LamportClock
    71  
    72  	// Broadcast queue for network event gossip.
    73  	networkBroadcasts *memberlist.TransmitLimitedQueue
    74  
    75  	// Broadcast queue for node event gossip.
    76  	nodeBroadcasts *memberlist.TransmitLimitedQueue
    77  
    78  	// A central stop channel to stop all go routines running on
    79  	// behalf of the NetworkDB instance.
    80  	stopCh chan struct{}
    81  
    82  	// A central broadcaster for all local watchers watching table
    83  	// events.
    84  	broadcaster *events.Broadcaster
    85  
    86  	// List of all tickers which needed to be stopped when
    87  	// cleaning up.
    88  	tickers []*time.Ticker
    89  
    90  	// Reference to the memberlist's keyring to add & remove keys
    91  	keyring *memberlist.Keyring
    92  }
    93  
    94  type node struct {
    95  	memberlist.Node
    96  	ltime serf.LamportTime
    97  }
    98  
    99  // network describes the node/network attachment.
   100  type network struct {
   101  	// Network ID
   102  	id string
   103  
   104  	// Lamport time for the latest state of the entry.
   105  	ltime serf.LamportTime
   106  
   107  	// Node leave is in progress.
   108  	leaving bool
   109  
   110  	// The time this node knew about the node's network leave.
   111  	leaveTime time.Time
   112  
   113  	// The broadcast queue for table event gossip. This is only
   114  	// initialized for this node's network attachment entries.
   115  	tableBroadcasts *memberlist.TransmitLimitedQueue
   116  }
   117  
   118  // Config represents the configuration of the networdb instance and
   119  // can be passed by the caller.
   120  type Config struct {
   121  	// NodeName is the cluster wide unique name for this node.
   122  	NodeName string
   123  
   124  	// BindAddr is the IP on which networkdb listens. It can be
   125  	// 0.0.0.0 to listen on all addresses on the host.
   126  	BindAddr string
   127  
   128  	// AdvertiseAddr is the node's IP address that we advertise for
   129  	// cluster communication.
   130  	AdvertiseAddr string
   131  
   132  	// BindPort is the local node's port to which we bind to for
   133  	// cluster communication.
   134  	BindPort int
   135  
   136  	// Keys to be added to the Keyring of the memberlist. Key at index
   137  	// 0 is the primary key
   138  	Keys [][]byte
   139  }
   140  
   141  // entry defines a table entry
   142  type entry struct {
   143  	// node from which this entry was learned.
   144  	node string
   145  
   146  	// Lamport time for the most recent update to the entry
   147  	ltime serf.LamportTime
   148  
   149  	// Opaque value store in the entry
   150  	value []byte
   151  
   152  	// Deleting the entry is in progress. All entries linger in
   153  	// the cluster for certain amount of time after deletion.
   154  	deleting bool
   155  
   156  	// The wall clock time when this node learned about this deletion.
   157  	deleteTime time.Time
   158  }
   159  
   160  // New creates a new instance of NetworkDB using the Config passed by
   161  // the caller.
   162  func New(c *Config) (*NetworkDB, error) {
   163  	nDB := &NetworkDB{
   164  		config:         c,
   165  		indexes:        make(map[int]*radix.Tree),
   166  		networks:       make(map[string]map[string]*network),
   167  		nodes:          make(map[string]*node),
   168  		failedNodes:    make(map[string]*node),
   169  		leftNodes:      make(map[string]*node),
   170  		networkNodes:   make(map[string][]string),
   171  		bulkSyncAckTbl: make(map[string]chan struct{}),
   172  		broadcaster:    events.NewBroadcaster(),
   173  	}
   174  
   175  	nDB.indexes[byTable] = radix.New()
   176  	nDB.indexes[byNetwork] = radix.New()
   177  
   178  	if err := nDB.clusterInit(); err != nil {
   179  		return nil, err
   180  	}
   181  
   182  	return nDB, nil
   183  }
   184  
   185  // Join joins this NetworkDB instance with a list of peer NetworkDB
   186  // instances passed by the caller in the form of addr:port
   187  func (nDB *NetworkDB) Join(members []string) error {
   188  	return nDB.clusterJoin(members)
   189  }
   190  
   191  // Close destroys this NetworkDB instance by leave the cluster,
   192  // stopping timers, canceling goroutines etc.
   193  func (nDB *NetworkDB) Close() {
   194  	if err := nDB.clusterLeave(); err != nil {
   195  		logrus.Errorf("Could not close DB %s: %v", nDB.config.NodeName, err)
   196  	}
   197  }
   198  
   199  // GetEntry retrieves the value of a table entry in a given (network,
   200  // table, key) tuple
   201  func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
   202  	entry, err := nDB.getEntry(tname, nid, key)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  
   207  	return entry.value, nil
   208  }
   209  
   210  func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
   211  	nDB.RLock()
   212  	defer nDB.RUnlock()
   213  
   214  	e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
   215  	if !ok {
   216  		return nil, fmt.Errorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
   217  	}
   218  
   219  	return e.(*entry), nil
   220  }
   221  
   222  // CreateEntry creates a table entry in NetworkDB for given (network,
   223  // table, key) tuple and if the NetworkDB is part of the cluster
   224  // propogates this event to the cluster. It is an error to create an
   225  // entry for the same tuple for which there is already an existing
   226  // entry.
   227  func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
   228  	if _, err := nDB.GetEntry(tname, nid, key); err == nil {
   229  		return fmt.Errorf("cannot create entry as the entry in table %s with network id %s and key %s already exists", tname, nid, key)
   230  	}
   231  
   232  	entry := &entry{
   233  		ltime: nDB.tableClock.Increment(),
   234  		node:  nDB.config.NodeName,
   235  		value: value,
   236  	}
   237  
   238  	if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil {
   239  		return fmt.Errorf("cannot send table create event: %v", err)
   240  	}
   241  
   242  	nDB.Lock()
   243  	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   244  	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   245  	nDB.Unlock()
   246  
   247  	nDB.broadcaster.Write(makeEvent(opCreate, tname, nid, key, value))
   248  	return nil
   249  }
   250  
   251  // UpdateEntry updates a table entry in NetworkDB for given (network,
   252  // table, key) tuple and if the NetworkDB is part of the cluster
   253  // propogates this event to the cluster. It is an error to update a
   254  // non-existent entry.
   255  func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
   256  	if _, err := nDB.GetEntry(tname, nid, key); err != nil {
   257  		return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
   258  	}
   259  
   260  	entry := &entry{
   261  		ltime: nDB.tableClock.Increment(),
   262  		node:  nDB.config.NodeName,
   263  		value: value,
   264  	}
   265  
   266  	if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil {
   267  		return fmt.Errorf("cannot send table update event: %v", err)
   268  	}
   269  
   270  	nDB.Lock()
   271  	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   272  	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   273  	nDB.Unlock()
   274  
   275  	nDB.broadcaster.Write(makeEvent(opUpdate, tname, nid, key, value))
   276  	return nil
   277  }
   278  
   279  // DeleteEntry deletes a table entry in NetworkDB for given (network,
   280  // table, key) tuple and if the NetworkDB is part of the cluster
   281  // propogates this event to the cluster.
   282  func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
   283  	value, err := nDB.GetEntry(tname, nid, key)
   284  	if err != nil {
   285  		return fmt.Errorf("cannot delete entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
   286  	}
   287  
   288  	entry := &entry{
   289  		ltime:      nDB.tableClock.Increment(),
   290  		node:       nDB.config.NodeName,
   291  		value:      value,
   292  		deleting:   true,
   293  		deleteTime: time.Now(),
   294  	}
   295  
   296  	if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
   297  		return fmt.Errorf("cannot send table delete event: %v", err)
   298  	}
   299  
   300  	nDB.Lock()
   301  	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   302  	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   303  	nDB.Unlock()
   304  
   305  	nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, value))
   306  	return nil
   307  }
   308  
   309  func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) {
   310  	nDB.Lock()
   311  	for nid, nodes := range nDB.networkNodes {
   312  		updatedNodes := make([]string, 0, len(nodes))
   313  		for _, node := range nodes {
   314  			if node == deletedNode {
   315  				continue
   316  			}
   317  
   318  			updatedNodes = append(updatedNodes, node)
   319  		}
   320  
   321  		nDB.networkNodes[nid] = updatedNodes
   322  	}
   323  
   324  	delete(nDB.networks, deletedNode)
   325  	nDB.Unlock()
   326  }
   327  
   328  func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
   329  	nDB.Lock()
   330  	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
   331  		oldEntry := v.(*entry)
   332  		if oldEntry.node != node {
   333  			return false
   334  		}
   335  
   336  		params := strings.Split(path[1:], "/")
   337  		tname := params[0]
   338  		nid := params[1]
   339  		key := params[2]
   340  
   341  		entry := &entry{
   342  			ltime:      oldEntry.ltime,
   343  			node:       node,
   344  			value:      oldEntry.value,
   345  			deleting:   true,
   346  			deleteTime: time.Now(),
   347  		}
   348  
   349  		nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   350  		nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   351  
   352  		nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
   353  		return false
   354  	})
   355  	nDB.Unlock()
   356  }
   357  
   358  // WalkTable walks a single table in NetworkDB and invokes the passed
   359  // function for each entry in the table passing the network, key,
   360  // value. The walk stops if the passed function returns a true.
   361  func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bool) error {
   362  	nDB.RLock()
   363  	values := make(map[string]interface{})
   364  	nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool {
   365  		values[path] = v
   366  		return false
   367  	})
   368  	nDB.RUnlock()
   369  
   370  	for k, v := range values {
   371  		params := strings.Split(k[1:], "/")
   372  		nid := params[1]
   373  		key := params[2]
   374  		if fn(nid, key, v.(*entry).value) {
   375  			return nil
   376  		}
   377  	}
   378  
   379  	return nil
   380  }
   381  
   382  // JoinNetwork joins this node to a given network and propogates this
   383  // event across the cluster. This triggers this node joining the
   384  // sub-cluster of this network and participates in the network-scoped
   385  // gossip and bulk sync for this network.
   386  func (nDB *NetworkDB) JoinNetwork(nid string) error {
   387  	ltime := nDB.networkClock.Increment()
   388  
   389  	nDB.Lock()
   390  	nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
   391  	if !ok {
   392  		nodeNetworks = make(map[string]*network)
   393  		nDB.networks[nDB.config.NodeName] = nodeNetworks
   394  	}
   395  	nodeNetworks[nid] = &network{id: nid, ltime: ltime}
   396  	nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{
   397  		NumNodes: func() int {
   398  			nDB.RLock()
   399  			num := len(nDB.networkNodes[nid])
   400  			nDB.RUnlock()
   401  			return num
   402  		},
   403  		RetransmitMult: 4,
   404  	}
   405  	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName)
   406  	networkNodes := nDB.networkNodes[nid]
   407  	nDB.Unlock()
   408  
   409  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
   410  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   411  	}
   412  
   413  	logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid)
   414  	if _, err := nDB.bulkSync(networkNodes, true); err != nil {
   415  		logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
   416  	}
   417  
   418  	return nil
   419  }
   420  
   421  // LeaveNetwork leaves this node from a given network and propogates
   422  // this event across the cluster. This triggers this node leaving the
   423  // sub-cluster of this network and as a result will no longer
   424  // participate in the network-scoped gossip and bulk sync for this
   425  // network. Also remove all the table entries for this network from
   426  // networkdb
   427  func (nDB *NetworkDB) LeaveNetwork(nid string) error {
   428  	ltime := nDB.networkClock.Increment()
   429  	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeLeave, ltime); err != nil {
   430  		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
   431  	}
   432  
   433  	nDB.Lock()
   434  	defer nDB.Unlock()
   435  	var (
   436  		paths   []string
   437  		entries []*entry
   438  	)
   439  
   440  	nwWalker := func(path string, v interface{}) bool {
   441  		entry, ok := v.(*entry)
   442  		if !ok {
   443  			return false
   444  		}
   445  		paths = append(paths, path)
   446  		entries = append(entries, entry)
   447  		return false
   448  	}
   449  
   450  	nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), nwWalker)
   451  	for _, path := range paths {
   452  		params := strings.Split(path[1:], "/")
   453  		tname := params[1]
   454  		key := params[2]
   455  
   456  		if _, ok := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key)); !ok {
   457  			logrus.Errorf("Could not delete entry in table %s with network id %s and key %s as it does not exist", tname, nid, key)
   458  		}
   459  
   460  		if _, ok := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)); !ok {
   461  			logrus.Errorf("Could not delete entry in network %s with table name %s and key %s as it does not exist", nid, tname, key)
   462  		}
   463  	}
   464  
   465  	nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
   466  	if !ok {
   467  		return fmt.Errorf("could not find self node for network %s while trying to leave", nid)
   468  	}
   469  
   470  	n, ok := nodeNetworks[nid]
   471  	if !ok {
   472  		return fmt.Errorf("could not find network %s while trying to leave", nid)
   473  	}
   474  
   475  	n.ltime = ltime
   476  	n.leaving = true
   477  	return nil
   478  }
   479  
   480  // addNetworkNode adds the node to the list of nodes which participate
   481  // in the passed network only if it is not already present. Caller
   482  // should hold the NetworkDB lock while calling this
   483  func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
   484  	nodes := nDB.networkNodes[nid]
   485  	for _, node := range nodes {
   486  		if node == nodeName {
   487  			return
   488  		}
   489  	}
   490  
   491  	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nodeName)
   492  }
   493  
   494  // Deletes the node from the list of nodes which participate in the
   495  // passed network. Caller should hold the NetworkDB lock while calling
   496  // this
   497  func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
   498  	nodes := nDB.networkNodes[nid]
   499  	newNodes := make([]string, 0, len(nodes)-1)
   500  	for _, name := range nodes {
   501  		if name == nodeName {
   502  			continue
   503  		}
   504  		newNodes = append(newNodes, name)
   505  	}
   506  	nDB.networkNodes[nid] = newNodes
   507  }
   508  
   509  // findCommonnetworks find the networks that both this node and the
   510  // passed node have joined.
   511  func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string {
   512  	nDB.RLock()
   513  	defer nDB.RUnlock()
   514  
   515  	var networks []string
   516  	for nid := range nDB.networks[nDB.config.NodeName] {
   517  		if n, ok := nDB.networks[nodeName][nid]; ok {
   518  			if !n.leaving {
   519  				networks = append(networks, nid)
   520  			}
   521  		}
   522  	}
   523  
   524  	return networks
   525  }
   526  
   527  func (nDB *NetworkDB) updateLocalNetworkTime() {
   528  	nDB.Lock()
   529  	defer nDB.Unlock()
   530  
   531  	ltime := nDB.networkClock.Increment()
   532  	for _, n := range nDB.networks[nDB.config.NodeName] {
   533  		n.ltime = ltime
   534  	}
   535  }
   536  
   537  func (nDB *NetworkDB) updateLocalTableTime() {
   538  	nDB.Lock()
   539  	defer nDB.Unlock()
   540  
   541  	ltime := nDB.tableClock.Increment()
   542  	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
   543  		entry := v.(*entry)
   544  		if entry.node != nDB.config.NodeName {
   545  			return false
   546  		}
   547  
   548  		params := strings.Split(path[1:], "/")
   549  		tname := params[0]
   550  		nid := params[1]
   551  		key := params[2]
   552  		entry.ltime = ltime
   553  
   554  		nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
   555  		nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
   556  
   557  		return false
   558  	})
   559  }