github.com/m3db/m3@v1.5.0/src/m3em/cluster/cluster.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package cluster
    22  
    23  import (
    24  	"fmt"
    25  	"sync"
    26  
    27  	"github.com/m3db/m3/src/cluster/placement"
    28  	"github.com/m3db/m3/src/cluster/shard"
    29  	"github.com/m3db/m3/src/m3em/node"
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  
    32  	"go.uber.org/zap"
    33  )
    34  
    35  var (
    36  	errInsufficientCapacity          = fmt.Errorf("insufficient node capacity in environment")
    37  	errNodeNotInUse                  = fmt.Errorf("unable to remove node, not in use")
    38  	errClusterNotUnitialized         = fmt.Errorf("unable to setup cluster, it is not unitialized")
    39  	errClusterUnableToAlterPlacement = fmt.Errorf("unable to alter cluster placement, it needs to be setup/running")
    40  	errUnableToStartUnsetupCluster   = fmt.Errorf("unable to start cluster, it has not been setup")
    41  	errClusterUnableToTeardown       = fmt.Errorf("unable to teardown cluster, it has not been setup")
    42  	errUnableToStopNotRunningCluster = fmt.Errorf("unable to stop cluster, it is running")
    43  )
    44  
    45  type idToNodeMap map[string]node.ServiceNode
    46  
    47  func (im idToNodeMap) values() []node.ServiceNode {
    48  	returnNodes := make([]node.ServiceNode, 0, len(im))
    49  	for _, node := range im {
    50  		returnNodes = append(returnNodes, node)
    51  	}
    52  	return returnNodes
    53  }
    54  
    55  type svcCluster struct {
    56  	sync.RWMutex
    57  
    58  	logger       *zap.Logger
    59  	opts         Options
    60  	knownNodes   node.ServiceNodes
    61  	usedNodes    idToNodeMap
    62  	spares       []node.ServiceNode
    63  	sparesByID   map[string]node.ServiceNode
    64  	placementSvc placement.Service
    65  	placement    placement.Placement
    66  	status       Status
    67  	lastErr      error
    68  }
    69  
    70  // New returns a new cluster backed by provided service nodes
    71  func New(
    72  	nodes node.ServiceNodes,
    73  	opts Options,
    74  ) (Cluster, error) {
    75  	if err := opts.Validate(); err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	cluster := &svcCluster{
    80  		logger:       opts.InstrumentOptions().Logger(),
    81  		opts:         opts,
    82  		knownNodes:   nodes,
    83  		usedNodes:    make(idToNodeMap, len(nodes)),
    84  		spares:       make([]node.ServiceNode, 0, len(nodes)),
    85  		sparesByID:   make(map[string]node.ServiceNode, len(nodes)),
    86  		placementSvc: opts.PlacementService(),
    87  		status:       ClusterStatusUninitialized,
    88  	}
    89  	cluster.addSparesWithLock(nodes)
    90  
    91  	return cluster, nil
    92  }
    93  
    94  func (c *svcCluster) addSparesWithLock(spares []node.ServiceNode) {
    95  	for _, spare := range spares {
    96  		c.spares = append(c.spares, spare)
    97  		c.sparesByID[spare.ID()] = spare
    98  	}
    99  }
   100  
   101  func nodeSliceWithoutID(originalSlice node.ServiceNodes, removeID string) node.ServiceNodes {
   102  	newSlice := make(node.ServiceNodes, 0, len(originalSlice))
   103  	for _, elem := range originalSlice {
   104  		if elem.ID() != removeID {
   105  			newSlice = append(newSlice, elem)
   106  		}
   107  	}
   108  	return newSlice
   109  }
   110  
   111  func (c *svcCluster) newExecutor(
   112  	nodes node.ServiceNodes,
   113  	fn node.ServiceNodeFn,
   114  ) node.ConcurrentExecutor {
   115  	return node.NewConcurrentExecutor(nodes, c.opts.NodeConcurrency(), c.opts.NodeOperationTimeout(), fn)
   116  }
   117  
   118  func (c *svcCluster) Placement() placement.Placement {
   119  	c.Lock()
   120  	defer c.Unlock()
   121  	return c.placement
   122  }
   123  
   124  func (c *svcCluster) initWithLock() error {
   125  	psvc := c.placementSvc
   126  	_, err := psvc.Placement()
   127  	if err != nil { // attempt to retrieve current placement
   128  		c.logger.Info("unable to retrieve existing placement, skipping delete attempt")
   129  	} else {
   130  		// delete existing placement
   131  		err = c.opts.PlacementServiceRetrier().Attempt(psvc.Delete)
   132  		if err != nil {
   133  			return fmt.Errorf("unable to delete existing placement during setup(): %+v", err)
   134  		}
   135  		c.logger.Info("successfully deleted existing placement")
   136  	}
   137  
   138  	var (
   139  		svcBuild        = c.opts.ServiceBuild()
   140  		svcConf         = c.opts.ServiceConfig()
   141  		sessionToken    = c.opts.SessionToken()
   142  		sessionOverride = c.opts.SessionOverride()
   143  		listener        = c.opts.NodeListener()
   144  	)
   145  
   146  	// setup all known service nodes with build, config
   147  	executor := c.newExecutor(c.knownNodes, func(node node.ServiceNode) error {
   148  		err := node.Setup(svcBuild, svcConf, sessionToken, sessionOverride)
   149  		if err != nil {
   150  			return err
   151  		}
   152  		if listener != nil {
   153  			// NB: no need to track returned listenerID here, it's cleaned up in node.Teardown()
   154  			node.RegisterListener(listener)
   155  		}
   156  		return nil
   157  	})
   158  	return executor.Run()
   159  }
   160  
   161  func (c *svcCluster) Setup(numNodes int) ([]node.ServiceNode, error) {
   162  	c.Lock()
   163  	defer c.Unlock()
   164  
   165  	if c.status != ClusterStatusUninitialized {
   166  		return nil, errClusterNotUnitialized
   167  	}
   168  
   169  	numSpares := len(c.spares)
   170  	if numSpares < numNodes {
   171  		return nil, errInsufficientCapacity
   172  	}
   173  
   174  	if err := c.initWithLock(); err != nil {
   175  		return nil, err
   176  	}
   177  
   178  	psvc := c.placementSvc
   179  	spares := c.sparesAsPlacementInstaceWithLock()[:numNodes]
   180  
   181  	// we don't need to use the retrier here as there are no other users of this placement yet
   182  	placement, err := psvc.BuildInitialPlacement(spares, c.opts.NumShards(), c.opts.Replication())
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	// update ServiceNode with new shards from placement
   188  	var (
   189  		multiErr      xerrors.MultiError
   190  		usedInstances = placement.Instances()
   191  		setupNodes    = make([]node.ServiceNode, 0, len(usedInstances))
   192  	)
   193  	for _, instance := range usedInstances {
   194  		setupNode, err := c.markSpareUsedWithLock(instance)
   195  		if err != nil {
   196  			multiErr = multiErr.Add(err)
   197  			continue
   198  		}
   199  		setupNodes = append(setupNodes, setupNode)
   200  	}
   201  
   202  	multiErr = multiErr.
   203  		Add(c.setPlacementWithLock(placement))
   204  
   205  	return setupNodes, c.markStatusWithLock(ClusterStatusSetup, multiErr.FinalError())
   206  }
   207  
   208  func (c *svcCluster) markSpareUsedWithLock(spare placement.Instance) (node.ServiceNode, error) {
   209  	id := spare.ID()
   210  	spareNode, ok := c.sparesByID[id]
   211  	if !ok {
   212  		// should never happen
   213  		return nil, fmt.Errorf("unable to find spare node with id: %s", id)
   214  	}
   215  	delete(c.sparesByID, id)
   216  	c.spares = nodeSliceWithoutID(c.spares, id)
   217  	c.usedNodes[id] = spareNode
   218  	return spareNode, nil
   219  }
   220  
   221  func (c *svcCluster) AddSpecifiedNode(newNode node.ServiceNode) error {
   222  	c.Lock()
   223  	defer c.Unlock()
   224  
   225  	if !c.isSpareNodeWithLock(newNode) {
   226  		return fmt.Errorf("provided node is not a known spare")
   227  	}
   228  
   229  	_, err := c.addNodeFromListWithLock([]placement.Instance{newNode.(placement.Instance)})
   230  	return err
   231  }
   232  
   233  func (c *svcCluster) isSpareNodeWithLock(n node.ServiceNode) bool {
   234  	_, ok := c.sparesByID[n.ID()]
   235  	return ok
   236  }
   237  
   238  func (c *svcCluster) addNodeFromListWithLock(candidates []placement.Instance) (node.ServiceNode, error) {
   239  	if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup {
   240  		return nil, errClusterUnableToAlterPlacement
   241  	}
   242  
   243  	var (
   244  		psvc          = c.placementSvc
   245  		newPlacement  placement.Placement
   246  		usedInstances []placement.Instance
   247  	)
   248  	if err := c.opts.PlacementServiceRetrier().Attempt(func() error {
   249  		var internalErr error
   250  		newPlacement, usedInstances, internalErr = psvc.AddInstances(candidates)
   251  		return internalErr
   252  	}); err != nil {
   253  		return nil, err
   254  	}
   255  
   256  	if len(usedInstances) != 1 {
   257  		return nil, fmt.Errorf("%d instances added to the placement, expecting 1", len(usedInstances))
   258  	}
   259  
   260  	setupNode, err := c.markSpareUsedWithLock(usedInstances[0])
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  
   265  	return setupNode, c.setPlacementWithLock(newPlacement)
   266  }
   267  
   268  func (c *svcCluster) AddNode() (node.ServiceNode, error) {
   269  	c.Lock()
   270  	defer c.Unlock()
   271  
   272  	numSpares := len(c.spares)
   273  	if numSpares < 1 {
   274  		return nil, errInsufficientCapacity
   275  	}
   276  
   277  	return c.addNodeFromListWithLock(c.sparesAsPlacementInstaceWithLock())
   278  }
   279  
   280  func (c *svcCluster) setPlacementWithLock(p placement.Placement) error {
   281  	for _, instance := range p.Instances() {
   282  		// nb(prateek): update usedNodes with the new shards.
   283  		instanceID := instance.ID()
   284  		usedNode, ok := c.usedNodes[instanceID]
   285  		if ok {
   286  			usedNode.SetShards(instance.Shards())
   287  		}
   288  	}
   289  
   290  	c.placement = p
   291  	return nil
   292  }
   293  
   294  func (c *svcCluster) sparesAsPlacementInstaceWithLock() []placement.Instance {
   295  	spares := make([]placement.Instance, 0, len(c.spares))
   296  	for _, spare := range c.spares {
   297  		spares = append(spares, spare.(placement.Instance))
   298  	}
   299  	return spares
   300  }
   301  
   302  func (c *svcCluster) RemoveNode(i node.ServiceNode) error {
   303  	c.Lock()
   304  	defer c.Unlock()
   305  
   306  	if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup {
   307  		return errClusterUnableToAlterPlacement
   308  	}
   309  
   310  	usedNode, ok := c.usedNodes[i.ID()]
   311  	if !ok {
   312  		return errNodeNotInUse
   313  	}
   314  
   315  	var (
   316  		newPlacement placement.Placement
   317  		psvc         = c.placementSvc
   318  	)
   319  	if err := c.opts.PlacementServiceRetrier().Attempt(func() error {
   320  		var internalErr error
   321  		newPlacement, internalErr = psvc.RemoveInstances([]string{i.ID()})
   322  		return internalErr
   323  	}); err != nil {
   324  		return err
   325  	}
   326  
   327  	// update removed instance from used -> spare
   328  	// nb(prateek): this omits modeling "leaving" shards on the node being removed
   329  	usedNode.SetShards(shard.NewShards(nil))
   330  	delete(c.usedNodes, usedNode.ID())
   331  	c.addSparesWithLock([]node.ServiceNode{usedNode})
   332  
   333  	return c.setPlacementWithLock(newPlacement)
   334  }
   335  
   336  func (c *svcCluster) ReplaceNode(oldNode node.ServiceNode) ([]node.ServiceNode, error) {
   337  	c.Lock()
   338  	defer c.Unlock()
   339  
   340  	if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup {
   341  		return nil, errClusterUnableToAlterPlacement
   342  	}
   343  
   344  	if _, ok := c.usedNodes[oldNode.ID()]; !ok {
   345  		return nil, errNodeNotInUse
   346  	}
   347  
   348  	var (
   349  		psvc            = c.placementSvc
   350  		spareCandidates = c.sparesAsPlacementInstaceWithLock()
   351  		newPlacement    placement.Placement
   352  		newInstances    []placement.Instance
   353  	)
   354  	if err := c.opts.PlacementServiceRetrier().Attempt(func() error {
   355  		var internalErr error
   356  		newPlacement, newInstances, internalErr = psvc.ReplaceInstances([]string{oldNode.ID()}, spareCandidates)
   357  		return internalErr
   358  	}); err != nil {
   359  		return nil, err
   360  	}
   361  
   362  	// mark old node no longer used
   363  	oldNode.SetShards(shard.NewShards(nil))
   364  	delete(c.usedNodes, oldNode.ID())
   365  	c.addSparesWithLock([]node.ServiceNode{oldNode})
   366  
   367  	var (
   368  		multiErr xerrors.MultiError
   369  		newNodes = make([]node.ServiceNode, 0, len(newInstances))
   370  	)
   371  	for _, instance := range newInstances {
   372  		newNode, err := c.markSpareUsedWithLock(instance)
   373  		if err != nil {
   374  			multiErr = multiErr.Add(err)
   375  			continue
   376  		}
   377  		newNodes = append(newNodes, newNode)
   378  	}
   379  
   380  	multiErr = multiErr.
   381  		Add(c.setPlacementWithLock(newPlacement))
   382  
   383  	return newNodes, multiErr.FinalError()
   384  }
   385  
   386  func (c *svcCluster) SpareNodes() []node.ServiceNode {
   387  	c.Lock()
   388  	defer c.Unlock()
   389  	return c.spares
   390  }
   391  
   392  func (c *svcCluster) ActiveNodes() []node.ServiceNode {
   393  	c.Lock()
   394  	defer c.Unlock()
   395  	return c.usedNodes.values()
   396  }
   397  
   398  func (c *svcCluster) KnownNodes() []node.ServiceNode {
   399  	c.Lock()
   400  	defer c.Unlock()
   401  	return c.knownNodes
   402  }
   403  
   404  func (c *svcCluster) markStatusWithLock(status Status, err error) error {
   405  	if err == nil {
   406  		c.status = status
   407  		return nil
   408  	}
   409  
   410  	c.status = ClusterStatusError
   411  	c.lastErr = err
   412  	return err
   413  }
   414  
   415  func (c *svcCluster) Teardown() error {
   416  	c.Lock()
   417  	defer c.Unlock()
   418  
   419  	if c.status == ClusterStatusUninitialized {
   420  		return errClusterUnableToTeardown
   421  	}
   422  
   423  	err := c.newExecutor(c.knownNodes, func(node node.ServiceNode) error {
   424  		return node.Teardown()
   425  	}).Run()
   426  
   427  	for id, usedNode := range c.usedNodes {
   428  		usedNode.SetShards(shard.NewShards(nil))
   429  		delete(c.usedNodes, id)
   430  	}
   431  	c.spares = make([]node.ServiceNode, 0, len(c.knownNodes))
   432  	c.sparesByID = make(map[string]node.ServiceNode, len(c.knownNodes))
   433  	c.addSparesWithLock(c.knownNodes)
   434  
   435  	return c.markStatusWithLock(ClusterStatusUninitialized, err)
   436  }
   437  
   438  func (c *svcCluster) Start() error {
   439  	c.Lock()
   440  	defer c.Unlock()
   441  
   442  	if c.status != ClusterStatusSetup {
   443  		return errUnableToStartUnsetupCluster
   444  	}
   445  
   446  	err := c.newExecutor(c.usedNodes.values(), func(node node.ServiceNode) error {
   447  		return node.Start()
   448  	}).Run()
   449  
   450  	return c.markStatusWithLock(ClusterStatusRunning, err)
   451  }
   452  
   453  func (c *svcCluster) Stop() error {
   454  	c.Lock()
   455  	defer c.Unlock()
   456  
   457  	if c.status != ClusterStatusRunning {
   458  		return errUnableToStopNotRunningCluster
   459  	}
   460  
   461  	err := c.newExecutor(c.usedNodes.values(), func(node node.ServiceNode) error {
   462  		return node.Stop()
   463  	}).Run()
   464  
   465  	return c.markStatusWithLock(ClusterStatusSetup, err)
   466  }
   467  
   468  func (c *svcCluster) Status() Status {
   469  	c.RLock()
   470  	defer c.RUnlock()
   471  	return c.status
   472  }