gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/cluster.go (about)

     1  package rethinkdb
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sort"
     7  	"strings"
     8  	"sync"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	"github.com/hailocab/go-hostpool"
    13  	"github.com/sirupsen/logrus"
    14  	"golang.org/x/net/context"
    15  	"gopkg.in/cenkalti/backoff.v2"
    16  )
    17  
    18  var errClusterClosed = errors.New("rethinkdb: cluster is closed")
    19  
    20  const (
    21  	clusterWorking = 0
    22  	clusterClosed  = 1
    23  )
    24  
    25  // A Cluster represents a connection to a RethinkDB cluster, a cluster is created
    26  // by the Session and should rarely be created manually.
    27  //
    28  // The cluster keeps track of all nodes in the cluster and if requested can listen
    29  // for cluster changes and start tracking a new node if one appears. Currently
    30  // nodes are removed from the pool if they become unhealthy (100 failed queries).
    31  // This should hopefully soon be replaced by a backoff system.
    32  type Cluster struct {
    33  	opts *ConnectOpts
    34  
    35  	mu     sync.RWMutex
    36  	seeds  []Host // Initial host nodes specified by user.
    37  	hp     hostpool.HostPool
    38  	nodes  map[string]*Node // Active nodes in cluster.
    39  	closed int32            // 0 - working, 1 - closed
    40  
    41  	connFactory connFactory
    42  
    43  	discoverInterval time.Duration
    44  }
    45  
    46  // NewCluster creates a new cluster by connecting to the given hosts.
    47  func NewCluster(hosts []Host, opts *ConnectOpts) (*Cluster, error) {
    48  	c := &Cluster{
    49  		hp:          newHostPool(opts),
    50  		seeds:       hosts,
    51  		opts:        opts,
    52  		closed:      clusterWorking,
    53  		connFactory: NewConnection,
    54  	}
    55  
    56  	err := c.run()
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	return c, nil
    62  }
    63  
    64  func newHostPool(opts *ConnectOpts) hostpool.HostPool {
    65  	return hostpool.NewEpsilonGreedy([]string{}, opts.HostDecayDuration, &hostpool.LinearEpsilonValueCalculator{})
    66  }
    67  
    68  func (c *Cluster) run() error {
    69  	// Attempt to connect to each host and discover any additional hosts if host
    70  	// discovery is enabled
    71  	if err := c.connectCluster(); err != nil {
    72  		return err
    73  	}
    74  
    75  	if !c.IsConnected() {
    76  		return ErrNoConnectionsStarted
    77  	}
    78  	return nil
    79  }
    80  
    81  // Query executes a ReQL query using the cluster to connect to the database
    82  func (c *Cluster) Query(ctx context.Context, q Query) (cursor *Cursor, err error) {
    83  	for i := 0; i < c.numRetries(); i++ {
    84  		var node *Node
    85  		var hpr hostpool.HostPoolResponse
    86  
    87  		node, hpr, err = c.GetNextNode()
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  
    92  		cursor, err = node.Query(ctx, q)
    93  		hpr.Mark(err)
    94  
    95  		if !shouldRetryQuery(q, err) {
    96  			break
    97  		}
    98  	}
    99  
   100  	return cursor, err
   101  }
   102  
   103  // Exec executes a ReQL query using the cluster to connect to the database
   104  func (c *Cluster) Exec(ctx context.Context, q Query) (err error) {
   105  	for i := 0; i < c.numRetries(); i++ {
   106  		var node *Node
   107  		var hpr hostpool.HostPoolResponse
   108  
   109  		node, hpr, err = c.GetNextNode()
   110  		if err != nil {
   111  			return err
   112  		}
   113  
   114  		err = node.Exec(ctx, q)
   115  		hpr.Mark(err)
   116  
   117  		if !shouldRetryQuery(q, err) {
   118  			break
   119  		}
   120  	}
   121  
   122  	return err
   123  }
   124  
   125  // Server returns the server name and server UUID being used by a connection.
   126  func (c *Cluster) Server() (response ServerResponse, err error) {
   127  	for i := 0; i < c.numRetries(); i++ {
   128  		var node *Node
   129  		var hpr hostpool.HostPoolResponse
   130  
   131  		node, hpr, err = c.GetNextNode()
   132  		if err != nil {
   133  			return ServerResponse{}, err
   134  		}
   135  
   136  		response, err = node.Server()
   137  		hpr.Mark(err)
   138  
   139  		// This query should not fail so retry if any error is detected
   140  		if err == nil {
   141  			break
   142  		}
   143  	}
   144  
   145  	return response, err
   146  }
   147  
   148  // SetInitialPoolCap sets the initial capacity of the connection pool.
   149  func (c *Cluster) SetInitialPoolCap(n int) {
   150  	for _, node := range c.GetNodes() {
   151  		node.SetInitialPoolCap(n)
   152  	}
   153  }
   154  
   155  // SetMaxIdleConns sets the maximum number of connections in the idle
   156  // connection pool.
   157  func (c *Cluster) SetMaxIdleConns(n int) {
   158  	for _, node := range c.GetNodes() {
   159  		node.SetMaxIdleConns(n)
   160  	}
   161  }
   162  
   163  // SetMaxOpenConns sets the maximum number of open connections to the database.
   164  func (c *Cluster) SetMaxOpenConns(n int) {
   165  	for _, node := range c.GetNodes() {
   166  		node.SetMaxOpenConns(n)
   167  	}
   168  }
   169  
   170  // Close closes the cluster
   171  func (c *Cluster) Close(optArgs ...CloseOpts) error {
   172  	if c.isClosed() {
   173  		return nil
   174  	}
   175  
   176  	for _, node := range c.GetNodes() {
   177  		err := node.Close(optArgs...)
   178  		if err != nil {
   179  			return err
   180  		}
   181  	}
   182  
   183  	c.hp.Close()
   184  	atomic.StoreInt32(&c.closed, clusterClosed)
   185  
   186  	return nil
   187  }
   188  
   189  func (c *Cluster) isClosed() bool {
   190  	return atomic.LoadInt32(&c.closed) == clusterClosed
   191  }
   192  
   193  // discover attempts to find new nodes in the cluster using the current nodes
   194  func (c *Cluster) discover() {
   195  	// Keep retrying with exponential backoff.
   196  	b := backoff.NewExponentialBackOff()
   197  	// Never finish retrying (max interval is still 60s)
   198  	b.MaxElapsedTime = 0
   199  	if c.discoverInterval != 0 {
   200  		b.InitialInterval = c.discoverInterval
   201  	}
   202  
   203  	// Keep trying to discover new nodes
   204  	for {
   205  		if c.isClosed() {
   206  			return
   207  		}
   208  
   209  		_ = backoff.RetryNotify(func() error {
   210  			if c.isClosed() {
   211  				return backoff.Permanent(errClusterClosed)
   212  			}
   213  			// If no hosts try seeding nodes
   214  			if len(c.GetNodes()) == 0 {
   215  				return c.connectCluster()
   216  			}
   217  
   218  			return c.listenForNodeChanges()
   219  		}, b, func(err error, wait time.Duration) {
   220  			Log.Debugf("Error discovering hosts %s, waiting: %s", err, wait)
   221  		})
   222  	}
   223  }
   224  
   225  // listenForNodeChanges listens for changes to node status using change feeds.
   226  // This function will block until the query fails
   227  func (c *Cluster) listenForNodeChanges() error {
   228  	// Start listening to changes from a random active node
   229  	node, hpr, err := c.GetNextNode()
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	q, err := newQuery(
   235  		DB(SystemDatabase).Table(ServerStatusSystemTable).Changes(ChangesOpts{IncludeInitial: true}),
   236  		map[string]interface{}{},
   237  		c.opts,
   238  	)
   239  	if err != nil {
   240  		return fmt.Errorf("Error building query: %s", err)
   241  	}
   242  
   243  	cursor, err := node.Query(context.Background(), q) // no need for timeout due to Changes()
   244  	if err != nil {
   245  		hpr.Mark(err)
   246  		return err
   247  	}
   248  	defer func() { _ = cursor.Close() }()
   249  
   250  	// Keep reading node status updates from changefeed
   251  	var result struct {
   252  		NewVal *nodeStatus `rethinkdb:"new_val"`
   253  		OldVal *nodeStatus `rethinkdb:"old_val"`
   254  	}
   255  	for cursor.Next(&result) {
   256  		addr := fmt.Sprintf("%s:%d", result.NewVal.Network.Hostname, result.NewVal.Network.ReqlPort)
   257  		addr = strings.ToLower(addr)
   258  
   259  		if result.NewVal != nil && result.OldVal == nil {
   260  			// added new node
   261  			if !c.nodeExists(result.NewVal.ID) {
   262  				// Connect to node using exponential backoff (give up after waiting 5s)
   263  				// to give the node time to start-up.
   264  				b := backoff.NewExponentialBackOff()
   265  				b.MaxElapsedTime = time.Second * 5
   266  
   267  				err = backoff.Retry(func() error {
   268  					node, err := c.connectNodeWithStatus(result.NewVal)
   269  					if err == nil {
   270  						c.addNode(node)
   271  
   272  						Log.WithFields(logrus.Fields{
   273  							"id":   node.ID,
   274  							"host": node.Host.String(),
   275  						}).Debug("Connected to node")
   276  					}
   277  					return err
   278  				}, b)
   279  				if err != nil {
   280  					return err
   281  				}
   282  			}
   283  		} else if result.OldVal != nil && result.NewVal == nil {
   284  			// removed old node
   285  			oldNode := c.removeNode(result.OldVal.ID)
   286  			if oldNode != nil {
   287  				_ = oldNode.Close()
   288  			}
   289  		} else {
   290  			// node updated
   291  			// nothing to do - assuming node can't change it's hostname in a single Changes() message
   292  		}
   293  	}
   294  
   295  	err = cursor.Err()
   296  	hpr.Mark(err)
   297  	return err
   298  }
   299  
   300  func (c *Cluster) connectCluster() error {
   301  	nodeSet := map[string]*Node{}
   302  	var attemptErr error
   303  
   304  	// Attempt to connect to each seed host
   305  	for _, host := range c.seeds {
   306  		conn, err := c.connFactory(host.String(), c.opts)
   307  		if err != nil {
   308  			attemptErr = err
   309  			Log.Warnf("Error creating connection: %s", err.Error())
   310  			continue
   311  		}
   312  
   313  		svrRsp, err := conn.Server()
   314  		if err != nil {
   315  			attemptErr = err
   316  			Log.Warnf("Error fetching server ID: %s", err)
   317  			_ = conn.Close()
   318  
   319  			continue
   320  		}
   321  		_ = conn.Close()
   322  
   323  		node, err := c.connectNode(svrRsp.ID, []Host{host})
   324  		if err != nil {
   325  			attemptErr = err
   326  			Log.Warnf("Error connecting to node: %s", err)
   327  			continue
   328  		}
   329  
   330  		if _, ok := nodeSet[node.ID]; !ok {
   331  			Log.WithFields(logrus.Fields{
   332  				"id":   node.ID,
   333  				"host": node.Host.String(),
   334  			}).Debug("Connected to node")
   335  
   336  			nodeSet[node.ID] = node
   337  		} else {
   338  			// dublicate node
   339  			_ = node.Close()
   340  		}
   341  	}
   342  
   343  	// If no nodes were contactable then return the last error, this does not
   344  	// include driver errors such as if there was an issue building the
   345  	// query
   346  	if len(nodeSet) == 0 {
   347  		if attemptErr != nil {
   348  			return attemptErr
   349  		}
   350  		return ErrNoConnections
   351  	}
   352  
   353  	var nodes []*Node
   354  	for _, node := range nodeSet {
   355  		nodes = append(nodes, node)
   356  	}
   357  	c.replaceNodes(nodes)
   358  
   359  	if c.opts.DiscoverHosts {
   360  		go c.discover()
   361  	}
   362  
   363  	return nil
   364  }
   365  
   366  func (c *Cluster) connectNodeWithStatus(s *nodeStatus) (*Node, error) {
   367  	aliases := make([]Host, len(s.Network.CanonicalAddresses))
   368  	for i, aliasAddress := range s.Network.CanonicalAddresses {
   369  		aliases[i] = NewHost(aliasAddress.Host, int(s.Network.ReqlPort))
   370  	}
   371  
   372  	return c.connectNode(s.ID, aliases)
   373  }
   374  
   375  func (c *Cluster) connectNode(id string, aliases []Host) (*Node, error) {
   376  	var pool *Pool
   377  	var err error
   378  
   379  	for len(aliases) > 0 {
   380  		pool, err = newPool(aliases[0], c.opts, c.connFactory)
   381  		if err != nil {
   382  			aliases = aliases[1:]
   383  			continue
   384  		}
   385  
   386  		err = pool.Ping()
   387  		if err != nil {
   388  			aliases = aliases[1:]
   389  			continue
   390  		}
   391  
   392  		// Ping successful so break out of loop
   393  		break
   394  	}
   395  
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  	if len(aliases) == 0 {
   400  		return nil, ErrInvalidNode
   401  	}
   402  
   403  	return newNode(id, aliases, pool), nil
   404  }
   405  
   406  // IsConnected returns true if cluster has nodes and is not already connClosed.
   407  func (c *Cluster) IsConnected() bool {
   408  	return (len(c.GetNodes()) > 0) && !c.isClosed()
   409  }
   410  
   411  // GetNextNode returns a random node on the cluster
   412  func (c *Cluster) GetNextNode() (*Node, hostpool.HostPoolResponse, error) {
   413  	if !c.IsConnected() {
   414  		return nil, nil, ErrNoConnections
   415  	}
   416  	c.mu.RLock()
   417  	defer c.mu.RUnlock()
   418  
   419  	nodes := c.nodes
   420  	hpr := c.hp.Get()
   421  	if n, ok := nodes[hpr.Host()]; ok {
   422  		if !n.Closed() {
   423  			return n, hpr, nil
   424  		}
   425  	}
   426  
   427  	return nil, nil, ErrNoConnections
   428  }
   429  
   430  // GetNodes returns a list of all nodes in the cluster
   431  func (c *Cluster) GetNodes() []*Node {
   432  	c.mu.RLock()
   433  	defer c.mu.RUnlock()
   434  	nodes := make([]*Node, 0, len(c.nodes))
   435  	for _, n := range c.nodes {
   436  		nodes = append(nodes, n)
   437  	}
   438  
   439  	return nodes
   440  }
   441  
   442  func (c *Cluster) nodeExists(nodeID string) bool {
   443  	c.mu.RLock()
   444  	defer c.mu.RUnlock()
   445  	for _, node := range c.nodes {
   446  		if node.ID == nodeID {
   447  			return true
   448  		}
   449  	}
   450  	return false
   451  }
   452  
   453  func (c *Cluster) addNode(node *Node) {
   454  	host := node.Host.String()
   455  	c.mu.Lock()
   456  	defer c.mu.Unlock()
   457  	if _, exist := c.nodes[host]; exist {
   458  		// addNode() should be called only if the node doesn't exist
   459  		return
   460  	}
   461  
   462  	c.nodes[host] = node
   463  
   464  	hosts := make([]string, 0, len(c.nodes))
   465  	for _, n := range c.nodes {
   466  		hosts = append(hosts, n.Host.String())
   467  	}
   468  	c.hp.SetHosts(hosts)
   469  }
   470  
   471  func (c *Cluster) replaceNodes(nodes []*Node) {
   472  	nodesMap := make(map[string]*Node, len(nodes))
   473  	hosts := make([]string, len(nodes))
   474  	for i, node := range nodes {
   475  		host := node.Host.String()
   476  
   477  		nodesMap[host] = node
   478  		hosts[i] = host
   479  	}
   480  
   481  	sort.Strings(hosts) // unit tests stability
   482  
   483  	c.mu.Lock()
   484  	c.nodes = nodesMap
   485  	c.hp.SetHosts(hosts)
   486  	c.mu.Unlock()
   487  }
   488  
   489  func (c *Cluster) removeNode(nodeID string) *Node {
   490  	c.mu.Lock()
   491  	defer c.mu.Unlock()
   492  	var rmNode *Node
   493  	for _, node := range c.nodes {
   494  		if node.ID == nodeID {
   495  			rmNode = node
   496  			break
   497  		}
   498  	}
   499  	if rmNode == nil {
   500  		return nil
   501  	}
   502  
   503  	delete(c.nodes, rmNode.Host.String())
   504  
   505  	hosts := make([]string, 0, len(c.nodes))
   506  	for _, n := range c.nodes {
   507  		hosts = append(hosts, n.Host.String())
   508  	}
   509  	c.hp.SetHosts(hosts)
   510  
   511  	return rmNode
   512  }
   513  
   514  func (c *Cluster) numRetries() int {
   515  	if n := c.opts.NumRetries; n > 0 {
   516  		return n
   517  	}
   518  
   519  	return 3
   520  }