gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/cluster.go (about) 1 package rethinkdb 2 3 import ( 4 "errors" 5 "fmt" 6 "sort" 7 "strings" 8 "sync" 9 "sync/atomic" 10 "time" 11 12 "github.com/hailocab/go-hostpool" 13 "github.com/sirupsen/logrus" 14 "golang.org/x/net/context" 15 "gopkg.in/cenkalti/backoff.v2" 16 ) 17 18 var errClusterClosed = errors.New("rethinkdb: cluster is closed") 19 20 const ( 21 clusterWorking = 0 22 clusterClosed = 1 23 ) 24 25 // A Cluster represents a connection to a RethinkDB cluster, a cluster is created 26 // by the Session and should rarely be created manually. 27 // 28 // The cluster keeps track of all nodes in the cluster and if requested can listen 29 // for cluster changes and start tracking a new node if one appears. Currently 30 // nodes are removed from the pool if they become unhealthy (100 failed queries). 31 // This should hopefully soon be replaced by a backoff system. 32 type Cluster struct { 33 opts *ConnectOpts 34 35 mu sync.RWMutex 36 seeds []Host // Initial host nodes specified by user. 37 hp hostpool.HostPool 38 nodes map[string]*Node // Active nodes in cluster. 39 closed int32 // 0 - working, 1 - closed 40 41 connFactory connFactory 42 43 discoverInterval time.Duration 44 } 45 46 // NewCluster creates a new cluster by connecting to the given hosts. 47 func NewCluster(hosts []Host, opts *ConnectOpts) (*Cluster, error) { 48 c := &Cluster{ 49 hp: newHostPool(opts), 50 seeds: hosts, 51 opts: opts, 52 closed: clusterWorking, 53 connFactory: NewConnection, 54 } 55 56 err := c.run() 57 if err != nil { 58 return nil, err 59 } 60 61 return c, nil 62 } 63 64 func newHostPool(opts *ConnectOpts) hostpool.HostPool { 65 return hostpool.NewEpsilonGreedy([]string{}, opts.HostDecayDuration, &hostpool.LinearEpsilonValueCalculator{}) 66 } 67 68 func (c *Cluster) run() error { 69 // Attempt to connect to each host and discover any additional hosts if host 70 // discovery is enabled 71 if err := c.connectCluster(); err != nil { 72 return err 73 } 74 75 if !c.IsConnected() { 76 return ErrNoConnectionsStarted 77 } 78 return nil 79 } 80 81 // Query executes a ReQL query using the cluster to connect to the database 82 func (c *Cluster) Query(ctx context.Context, q Query) (cursor *Cursor, err error) { 83 for i := 0; i < c.numRetries(); i++ { 84 var node *Node 85 var hpr hostpool.HostPoolResponse 86 87 node, hpr, err = c.GetNextNode() 88 if err != nil { 89 return nil, err 90 } 91 92 cursor, err = node.Query(ctx, q) 93 hpr.Mark(err) 94 95 if !shouldRetryQuery(q, err) { 96 break 97 } 98 } 99 100 return cursor, err 101 } 102 103 // Exec executes a ReQL query using the cluster to connect to the database 104 func (c *Cluster) Exec(ctx context.Context, q Query) (err error) { 105 for i := 0; i < c.numRetries(); i++ { 106 var node *Node 107 var hpr hostpool.HostPoolResponse 108 109 node, hpr, err = c.GetNextNode() 110 if err != nil { 111 return err 112 } 113 114 err = node.Exec(ctx, q) 115 hpr.Mark(err) 116 117 if !shouldRetryQuery(q, err) { 118 break 119 } 120 } 121 122 return err 123 } 124 125 // Server returns the server name and server UUID being used by a connection. 126 func (c *Cluster) Server() (response ServerResponse, err error) { 127 for i := 0; i < c.numRetries(); i++ { 128 var node *Node 129 var hpr hostpool.HostPoolResponse 130 131 node, hpr, err = c.GetNextNode() 132 if err != nil { 133 return ServerResponse{}, err 134 } 135 136 response, err = node.Server() 137 hpr.Mark(err) 138 139 // This query should not fail so retry if any error is detected 140 if err == nil { 141 break 142 } 143 } 144 145 return response, err 146 } 147 148 // SetInitialPoolCap sets the initial capacity of the connection pool. 149 func (c *Cluster) SetInitialPoolCap(n int) { 150 for _, node := range c.GetNodes() { 151 node.SetInitialPoolCap(n) 152 } 153 } 154 155 // SetMaxIdleConns sets the maximum number of connections in the idle 156 // connection pool. 157 func (c *Cluster) SetMaxIdleConns(n int) { 158 for _, node := range c.GetNodes() { 159 node.SetMaxIdleConns(n) 160 } 161 } 162 163 // SetMaxOpenConns sets the maximum number of open connections to the database. 164 func (c *Cluster) SetMaxOpenConns(n int) { 165 for _, node := range c.GetNodes() { 166 node.SetMaxOpenConns(n) 167 } 168 } 169 170 // Close closes the cluster 171 func (c *Cluster) Close(optArgs ...CloseOpts) error { 172 if c.isClosed() { 173 return nil 174 } 175 176 for _, node := range c.GetNodes() { 177 err := node.Close(optArgs...) 178 if err != nil { 179 return err 180 } 181 } 182 183 c.hp.Close() 184 atomic.StoreInt32(&c.closed, clusterClosed) 185 186 return nil 187 } 188 189 func (c *Cluster) isClosed() bool { 190 return atomic.LoadInt32(&c.closed) == clusterClosed 191 } 192 193 // discover attempts to find new nodes in the cluster using the current nodes 194 func (c *Cluster) discover() { 195 // Keep retrying with exponential backoff. 196 b := backoff.NewExponentialBackOff() 197 // Never finish retrying (max interval is still 60s) 198 b.MaxElapsedTime = 0 199 if c.discoverInterval != 0 { 200 b.InitialInterval = c.discoverInterval 201 } 202 203 // Keep trying to discover new nodes 204 for { 205 if c.isClosed() { 206 return 207 } 208 209 _ = backoff.RetryNotify(func() error { 210 if c.isClosed() { 211 return backoff.Permanent(errClusterClosed) 212 } 213 // If no hosts try seeding nodes 214 if len(c.GetNodes()) == 0 { 215 return c.connectCluster() 216 } 217 218 return c.listenForNodeChanges() 219 }, b, func(err error, wait time.Duration) { 220 Log.Debugf("Error discovering hosts %s, waiting: %s", err, wait) 221 }) 222 } 223 } 224 225 // listenForNodeChanges listens for changes to node status using change feeds. 226 // This function will block until the query fails 227 func (c *Cluster) listenForNodeChanges() error { 228 // Start listening to changes from a random active node 229 node, hpr, err := c.GetNextNode() 230 if err != nil { 231 return err 232 } 233 234 q, err := newQuery( 235 DB(SystemDatabase).Table(ServerStatusSystemTable).Changes(ChangesOpts{IncludeInitial: true}), 236 map[string]interface{}{}, 237 c.opts, 238 ) 239 if err != nil { 240 return fmt.Errorf("Error building query: %s", err) 241 } 242 243 cursor, err := node.Query(context.Background(), q) // no need for timeout due to Changes() 244 if err != nil { 245 hpr.Mark(err) 246 return err 247 } 248 defer func() { _ = cursor.Close() }() 249 250 // Keep reading node status updates from changefeed 251 var result struct { 252 NewVal *nodeStatus `rethinkdb:"new_val"` 253 OldVal *nodeStatus `rethinkdb:"old_val"` 254 } 255 for cursor.Next(&result) { 256 addr := fmt.Sprintf("%s:%d", result.NewVal.Network.Hostname, result.NewVal.Network.ReqlPort) 257 addr = strings.ToLower(addr) 258 259 if result.NewVal != nil && result.OldVal == nil { 260 // added new node 261 if !c.nodeExists(result.NewVal.ID) { 262 // Connect to node using exponential backoff (give up after waiting 5s) 263 // to give the node time to start-up. 264 b := backoff.NewExponentialBackOff() 265 b.MaxElapsedTime = time.Second * 5 266 267 err = backoff.Retry(func() error { 268 node, err := c.connectNodeWithStatus(result.NewVal) 269 if err == nil { 270 c.addNode(node) 271 272 Log.WithFields(logrus.Fields{ 273 "id": node.ID, 274 "host": node.Host.String(), 275 }).Debug("Connected to node") 276 } 277 return err 278 }, b) 279 if err != nil { 280 return err 281 } 282 } 283 } else if result.OldVal != nil && result.NewVal == nil { 284 // removed old node 285 oldNode := c.removeNode(result.OldVal.ID) 286 if oldNode != nil { 287 _ = oldNode.Close() 288 } 289 } else { 290 // node updated 291 // nothing to do - assuming node can't change it's hostname in a single Changes() message 292 } 293 } 294 295 err = cursor.Err() 296 hpr.Mark(err) 297 return err 298 } 299 300 func (c *Cluster) connectCluster() error { 301 nodeSet := map[string]*Node{} 302 var attemptErr error 303 304 // Attempt to connect to each seed host 305 for _, host := range c.seeds { 306 conn, err := c.connFactory(host.String(), c.opts) 307 if err != nil { 308 attemptErr = err 309 Log.Warnf("Error creating connection: %s", err.Error()) 310 continue 311 } 312 313 svrRsp, err := conn.Server() 314 if err != nil { 315 attemptErr = err 316 Log.Warnf("Error fetching server ID: %s", err) 317 _ = conn.Close() 318 319 continue 320 } 321 _ = conn.Close() 322 323 node, err := c.connectNode(svrRsp.ID, []Host{host}) 324 if err != nil { 325 attemptErr = err 326 Log.Warnf("Error connecting to node: %s", err) 327 continue 328 } 329 330 if _, ok := nodeSet[node.ID]; !ok { 331 Log.WithFields(logrus.Fields{ 332 "id": node.ID, 333 "host": node.Host.String(), 334 }).Debug("Connected to node") 335 336 nodeSet[node.ID] = node 337 } else { 338 // dublicate node 339 _ = node.Close() 340 } 341 } 342 343 // If no nodes were contactable then return the last error, this does not 344 // include driver errors such as if there was an issue building the 345 // query 346 if len(nodeSet) == 0 { 347 if attemptErr != nil { 348 return attemptErr 349 } 350 return ErrNoConnections 351 } 352 353 var nodes []*Node 354 for _, node := range nodeSet { 355 nodes = append(nodes, node) 356 } 357 c.replaceNodes(nodes) 358 359 if c.opts.DiscoverHosts { 360 go c.discover() 361 } 362 363 return nil 364 } 365 366 func (c *Cluster) connectNodeWithStatus(s *nodeStatus) (*Node, error) { 367 aliases := make([]Host, len(s.Network.CanonicalAddresses)) 368 for i, aliasAddress := range s.Network.CanonicalAddresses { 369 aliases[i] = NewHost(aliasAddress.Host, int(s.Network.ReqlPort)) 370 } 371 372 return c.connectNode(s.ID, aliases) 373 } 374 375 func (c *Cluster) connectNode(id string, aliases []Host) (*Node, error) { 376 var pool *Pool 377 var err error 378 379 for len(aliases) > 0 { 380 pool, err = newPool(aliases[0], c.opts, c.connFactory) 381 if err != nil { 382 aliases = aliases[1:] 383 continue 384 } 385 386 err = pool.Ping() 387 if err != nil { 388 aliases = aliases[1:] 389 continue 390 } 391 392 // Ping successful so break out of loop 393 break 394 } 395 396 if err != nil { 397 return nil, err 398 } 399 if len(aliases) == 0 { 400 return nil, ErrInvalidNode 401 } 402 403 return newNode(id, aliases, pool), nil 404 } 405 406 // IsConnected returns true if cluster has nodes and is not already connClosed. 407 func (c *Cluster) IsConnected() bool { 408 return (len(c.GetNodes()) > 0) && !c.isClosed() 409 } 410 411 // GetNextNode returns a random node on the cluster 412 func (c *Cluster) GetNextNode() (*Node, hostpool.HostPoolResponse, error) { 413 if !c.IsConnected() { 414 return nil, nil, ErrNoConnections 415 } 416 c.mu.RLock() 417 defer c.mu.RUnlock() 418 419 nodes := c.nodes 420 hpr := c.hp.Get() 421 if n, ok := nodes[hpr.Host()]; ok { 422 if !n.Closed() { 423 return n, hpr, nil 424 } 425 } 426 427 return nil, nil, ErrNoConnections 428 } 429 430 // GetNodes returns a list of all nodes in the cluster 431 func (c *Cluster) GetNodes() []*Node { 432 c.mu.RLock() 433 defer c.mu.RUnlock() 434 nodes := make([]*Node, 0, len(c.nodes)) 435 for _, n := range c.nodes { 436 nodes = append(nodes, n) 437 } 438 439 return nodes 440 } 441 442 func (c *Cluster) nodeExists(nodeID string) bool { 443 c.mu.RLock() 444 defer c.mu.RUnlock() 445 for _, node := range c.nodes { 446 if node.ID == nodeID { 447 return true 448 } 449 } 450 return false 451 } 452 453 func (c *Cluster) addNode(node *Node) { 454 host := node.Host.String() 455 c.mu.Lock() 456 defer c.mu.Unlock() 457 if _, exist := c.nodes[host]; exist { 458 // addNode() should be called only if the node doesn't exist 459 return 460 } 461 462 c.nodes[host] = node 463 464 hosts := make([]string, 0, len(c.nodes)) 465 for _, n := range c.nodes { 466 hosts = append(hosts, n.Host.String()) 467 } 468 c.hp.SetHosts(hosts) 469 } 470 471 func (c *Cluster) replaceNodes(nodes []*Node) { 472 nodesMap := make(map[string]*Node, len(nodes)) 473 hosts := make([]string, len(nodes)) 474 for i, node := range nodes { 475 host := node.Host.String() 476 477 nodesMap[host] = node 478 hosts[i] = host 479 } 480 481 sort.Strings(hosts) // unit tests stability 482 483 c.mu.Lock() 484 c.nodes = nodesMap 485 c.hp.SetHosts(hosts) 486 c.mu.Unlock() 487 } 488 489 func (c *Cluster) removeNode(nodeID string) *Node { 490 c.mu.Lock() 491 defer c.mu.Unlock() 492 var rmNode *Node 493 for _, node := range c.nodes { 494 if node.ID == nodeID { 495 rmNode = node 496 break 497 } 498 } 499 if rmNode == nil { 500 return nil 501 } 502 503 delete(c.nodes, rmNode.Host.String()) 504 505 hosts := make([]string, 0, len(c.nodes)) 506 for _, n := range c.nodes { 507 hosts = append(hosts, n.Host.String()) 508 } 509 c.hp.SetHosts(hosts) 510 511 return rmNode 512 } 513 514 func (c *Cluster) numRetries() int { 515 if n := c.opts.NumRetries; n > 0 { 516 return n 517 } 518 519 return 3 520 }