github.com/m3db/m3@v1.5.0/src/m3em/cluster/cluster.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package cluster 22 23 import ( 24 "fmt" 25 "sync" 26 27 "github.com/m3db/m3/src/cluster/placement" 28 "github.com/m3db/m3/src/cluster/shard" 29 "github.com/m3db/m3/src/m3em/node" 30 xerrors "github.com/m3db/m3/src/x/errors" 31 32 "go.uber.org/zap" 33 ) 34 35 var ( 36 errInsufficientCapacity = fmt.Errorf("insufficient node capacity in environment") 37 errNodeNotInUse = fmt.Errorf("unable to remove node, not in use") 38 errClusterNotUnitialized = fmt.Errorf("unable to setup cluster, it is not unitialized") 39 errClusterUnableToAlterPlacement = fmt.Errorf("unable to alter cluster placement, it needs to be setup/running") 40 errUnableToStartUnsetupCluster = fmt.Errorf("unable to start cluster, it has not been setup") 41 errClusterUnableToTeardown = fmt.Errorf("unable to teardown cluster, it has not been setup") 42 errUnableToStopNotRunningCluster = fmt.Errorf("unable to stop cluster, it is running") 43 ) 44 45 type idToNodeMap map[string]node.ServiceNode 46 47 func (im idToNodeMap) values() []node.ServiceNode { 48 returnNodes := make([]node.ServiceNode, 0, len(im)) 49 for _, node := range im { 50 returnNodes = append(returnNodes, node) 51 } 52 return returnNodes 53 } 54 55 type svcCluster struct { 56 sync.RWMutex 57 58 logger *zap.Logger 59 opts Options 60 knownNodes node.ServiceNodes 61 usedNodes idToNodeMap 62 spares []node.ServiceNode 63 sparesByID map[string]node.ServiceNode 64 placementSvc placement.Service 65 placement placement.Placement 66 status Status 67 lastErr error 68 } 69 70 // New returns a new cluster backed by provided service nodes 71 func New( 72 nodes node.ServiceNodes, 73 opts Options, 74 ) (Cluster, error) { 75 if err := opts.Validate(); err != nil { 76 return nil, err 77 } 78 79 cluster := &svcCluster{ 80 logger: opts.InstrumentOptions().Logger(), 81 opts: opts, 82 knownNodes: nodes, 83 usedNodes: make(idToNodeMap, len(nodes)), 84 spares: make([]node.ServiceNode, 0, len(nodes)), 85 sparesByID: make(map[string]node.ServiceNode, len(nodes)), 86 placementSvc: opts.PlacementService(), 87 status: ClusterStatusUninitialized, 88 } 89 cluster.addSparesWithLock(nodes) 90 91 return cluster, nil 92 } 93 94 func (c *svcCluster) addSparesWithLock(spares []node.ServiceNode) { 95 for _, spare := range spares { 96 c.spares = append(c.spares, spare) 97 c.sparesByID[spare.ID()] = spare 98 } 99 } 100 101 func nodeSliceWithoutID(originalSlice node.ServiceNodes, removeID string) node.ServiceNodes { 102 newSlice := make(node.ServiceNodes, 0, len(originalSlice)) 103 for _, elem := range originalSlice { 104 if elem.ID() != removeID { 105 newSlice = append(newSlice, elem) 106 } 107 } 108 return newSlice 109 } 110 111 func (c *svcCluster) newExecutor( 112 nodes node.ServiceNodes, 113 fn node.ServiceNodeFn, 114 ) node.ConcurrentExecutor { 115 return node.NewConcurrentExecutor(nodes, c.opts.NodeConcurrency(), c.opts.NodeOperationTimeout(), fn) 116 } 117 118 func (c *svcCluster) Placement() placement.Placement { 119 c.Lock() 120 defer c.Unlock() 121 return c.placement 122 } 123 124 func (c *svcCluster) initWithLock() error { 125 psvc := c.placementSvc 126 _, err := psvc.Placement() 127 if err != nil { // attempt to retrieve current placement 128 c.logger.Info("unable to retrieve existing placement, skipping delete attempt") 129 } else { 130 // delete existing placement 131 err = c.opts.PlacementServiceRetrier().Attempt(psvc.Delete) 132 if err != nil { 133 return fmt.Errorf("unable to delete existing placement during setup(): %+v", err) 134 } 135 c.logger.Info("successfully deleted existing placement") 136 } 137 138 var ( 139 svcBuild = c.opts.ServiceBuild() 140 svcConf = c.opts.ServiceConfig() 141 sessionToken = c.opts.SessionToken() 142 sessionOverride = c.opts.SessionOverride() 143 listener = c.opts.NodeListener() 144 ) 145 146 // setup all known service nodes with build, config 147 executor := c.newExecutor(c.knownNodes, func(node node.ServiceNode) error { 148 err := node.Setup(svcBuild, svcConf, sessionToken, sessionOverride) 149 if err != nil { 150 return err 151 } 152 if listener != nil { 153 // NB: no need to track returned listenerID here, it's cleaned up in node.Teardown() 154 node.RegisterListener(listener) 155 } 156 return nil 157 }) 158 return executor.Run() 159 } 160 161 func (c *svcCluster) Setup(numNodes int) ([]node.ServiceNode, error) { 162 c.Lock() 163 defer c.Unlock() 164 165 if c.status != ClusterStatusUninitialized { 166 return nil, errClusterNotUnitialized 167 } 168 169 numSpares := len(c.spares) 170 if numSpares < numNodes { 171 return nil, errInsufficientCapacity 172 } 173 174 if err := c.initWithLock(); err != nil { 175 return nil, err 176 } 177 178 psvc := c.placementSvc 179 spares := c.sparesAsPlacementInstaceWithLock()[:numNodes] 180 181 // we don't need to use the retrier here as there are no other users of this placement yet 182 placement, err := psvc.BuildInitialPlacement(spares, c.opts.NumShards(), c.opts.Replication()) 183 if err != nil { 184 return nil, err 185 } 186 187 // update ServiceNode with new shards from placement 188 var ( 189 multiErr xerrors.MultiError 190 usedInstances = placement.Instances() 191 setupNodes = make([]node.ServiceNode, 0, len(usedInstances)) 192 ) 193 for _, instance := range usedInstances { 194 setupNode, err := c.markSpareUsedWithLock(instance) 195 if err != nil { 196 multiErr = multiErr.Add(err) 197 continue 198 } 199 setupNodes = append(setupNodes, setupNode) 200 } 201 202 multiErr = multiErr. 203 Add(c.setPlacementWithLock(placement)) 204 205 return setupNodes, c.markStatusWithLock(ClusterStatusSetup, multiErr.FinalError()) 206 } 207 208 func (c *svcCluster) markSpareUsedWithLock(spare placement.Instance) (node.ServiceNode, error) { 209 id := spare.ID() 210 spareNode, ok := c.sparesByID[id] 211 if !ok { 212 // should never happen 213 return nil, fmt.Errorf("unable to find spare node with id: %s", id) 214 } 215 delete(c.sparesByID, id) 216 c.spares = nodeSliceWithoutID(c.spares, id) 217 c.usedNodes[id] = spareNode 218 return spareNode, nil 219 } 220 221 func (c *svcCluster) AddSpecifiedNode(newNode node.ServiceNode) error { 222 c.Lock() 223 defer c.Unlock() 224 225 if !c.isSpareNodeWithLock(newNode) { 226 return fmt.Errorf("provided node is not a known spare") 227 } 228 229 _, err := c.addNodeFromListWithLock([]placement.Instance{newNode.(placement.Instance)}) 230 return err 231 } 232 233 func (c *svcCluster) isSpareNodeWithLock(n node.ServiceNode) bool { 234 _, ok := c.sparesByID[n.ID()] 235 return ok 236 } 237 238 func (c *svcCluster) addNodeFromListWithLock(candidates []placement.Instance) (node.ServiceNode, error) { 239 if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup { 240 return nil, errClusterUnableToAlterPlacement 241 } 242 243 var ( 244 psvc = c.placementSvc 245 newPlacement placement.Placement 246 usedInstances []placement.Instance 247 ) 248 if err := c.opts.PlacementServiceRetrier().Attempt(func() error { 249 var internalErr error 250 newPlacement, usedInstances, internalErr = psvc.AddInstances(candidates) 251 return internalErr 252 }); err != nil { 253 return nil, err 254 } 255 256 if len(usedInstances) != 1 { 257 return nil, fmt.Errorf("%d instances added to the placement, expecting 1", len(usedInstances)) 258 } 259 260 setupNode, err := c.markSpareUsedWithLock(usedInstances[0]) 261 if err != nil { 262 return nil, err 263 } 264 265 return setupNode, c.setPlacementWithLock(newPlacement) 266 } 267 268 func (c *svcCluster) AddNode() (node.ServiceNode, error) { 269 c.Lock() 270 defer c.Unlock() 271 272 numSpares := len(c.spares) 273 if numSpares < 1 { 274 return nil, errInsufficientCapacity 275 } 276 277 return c.addNodeFromListWithLock(c.sparesAsPlacementInstaceWithLock()) 278 } 279 280 func (c *svcCluster) setPlacementWithLock(p placement.Placement) error { 281 for _, instance := range p.Instances() { 282 // nb(prateek): update usedNodes with the new shards. 283 instanceID := instance.ID() 284 usedNode, ok := c.usedNodes[instanceID] 285 if ok { 286 usedNode.SetShards(instance.Shards()) 287 } 288 } 289 290 c.placement = p 291 return nil 292 } 293 294 func (c *svcCluster) sparesAsPlacementInstaceWithLock() []placement.Instance { 295 spares := make([]placement.Instance, 0, len(c.spares)) 296 for _, spare := range c.spares { 297 spares = append(spares, spare.(placement.Instance)) 298 } 299 return spares 300 } 301 302 func (c *svcCluster) RemoveNode(i node.ServiceNode) error { 303 c.Lock() 304 defer c.Unlock() 305 306 if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup { 307 return errClusterUnableToAlterPlacement 308 } 309 310 usedNode, ok := c.usedNodes[i.ID()] 311 if !ok { 312 return errNodeNotInUse 313 } 314 315 var ( 316 newPlacement placement.Placement 317 psvc = c.placementSvc 318 ) 319 if err := c.opts.PlacementServiceRetrier().Attempt(func() error { 320 var internalErr error 321 newPlacement, internalErr = psvc.RemoveInstances([]string{i.ID()}) 322 return internalErr 323 }); err != nil { 324 return err 325 } 326 327 // update removed instance from used -> spare 328 // nb(prateek): this omits modeling "leaving" shards on the node being removed 329 usedNode.SetShards(shard.NewShards(nil)) 330 delete(c.usedNodes, usedNode.ID()) 331 c.addSparesWithLock([]node.ServiceNode{usedNode}) 332 333 return c.setPlacementWithLock(newPlacement) 334 } 335 336 func (c *svcCluster) ReplaceNode(oldNode node.ServiceNode) ([]node.ServiceNode, error) { 337 c.Lock() 338 defer c.Unlock() 339 340 if c.status != ClusterStatusRunning && c.status != ClusterStatusSetup { 341 return nil, errClusterUnableToAlterPlacement 342 } 343 344 if _, ok := c.usedNodes[oldNode.ID()]; !ok { 345 return nil, errNodeNotInUse 346 } 347 348 var ( 349 psvc = c.placementSvc 350 spareCandidates = c.sparesAsPlacementInstaceWithLock() 351 newPlacement placement.Placement 352 newInstances []placement.Instance 353 ) 354 if err := c.opts.PlacementServiceRetrier().Attempt(func() error { 355 var internalErr error 356 newPlacement, newInstances, internalErr = psvc.ReplaceInstances([]string{oldNode.ID()}, spareCandidates) 357 return internalErr 358 }); err != nil { 359 return nil, err 360 } 361 362 // mark old node no longer used 363 oldNode.SetShards(shard.NewShards(nil)) 364 delete(c.usedNodes, oldNode.ID()) 365 c.addSparesWithLock([]node.ServiceNode{oldNode}) 366 367 var ( 368 multiErr xerrors.MultiError 369 newNodes = make([]node.ServiceNode, 0, len(newInstances)) 370 ) 371 for _, instance := range newInstances { 372 newNode, err := c.markSpareUsedWithLock(instance) 373 if err != nil { 374 multiErr = multiErr.Add(err) 375 continue 376 } 377 newNodes = append(newNodes, newNode) 378 } 379 380 multiErr = multiErr. 381 Add(c.setPlacementWithLock(newPlacement)) 382 383 return newNodes, multiErr.FinalError() 384 } 385 386 func (c *svcCluster) SpareNodes() []node.ServiceNode { 387 c.Lock() 388 defer c.Unlock() 389 return c.spares 390 } 391 392 func (c *svcCluster) ActiveNodes() []node.ServiceNode { 393 c.Lock() 394 defer c.Unlock() 395 return c.usedNodes.values() 396 } 397 398 func (c *svcCluster) KnownNodes() []node.ServiceNode { 399 c.Lock() 400 defer c.Unlock() 401 return c.knownNodes 402 } 403 404 func (c *svcCluster) markStatusWithLock(status Status, err error) error { 405 if err == nil { 406 c.status = status 407 return nil 408 } 409 410 c.status = ClusterStatusError 411 c.lastErr = err 412 return err 413 } 414 415 func (c *svcCluster) Teardown() error { 416 c.Lock() 417 defer c.Unlock() 418 419 if c.status == ClusterStatusUninitialized { 420 return errClusterUnableToTeardown 421 } 422 423 err := c.newExecutor(c.knownNodes, func(node node.ServiceNode) error { 424 return node.Teardown() 425 }).Run() 426 427 for id, usedNode := range c.usedNodes { 428 usedNode.SetShards(shard.NewShards(nil)) 429 delete(c.usedNodes, id) 430 } 431 c.spares = make([]node.ServiceNode, 0, len(c.knownNodes)) 432 c.sparesByID = make(map[string]node.ServiceNode, len(c.knownNodes)) 433 c.addSparesWithLock(c.knownNodes) 434 435 return c.markStatusWithLock(ClusterStatusUninitialized, err) 436 } 437 438 func (c *svcCluster) Start() error { 439 c.Lock() 440 defer c.Unlock() 441 442 if c.status != ClusterStatusSetup { 443 return errUnableToStartUnsetupCluster 444 } 445 446 err := c.newExecutor(c.usedNodes.values(), func(node node.ServiceNode) error { 447 return node.Start() 448 }).Run() 449 450 return c.markStatusWithLock(ClusterStatusRunning, err) 451 } 452 453 func (c *svcCluster) Stop() error { 454 c.Lock() 455 defer c.Unlock() 456 457 if c.status != ClusterStatusRunning { 458 return errUnableToStopNotRunningCluster 459 } 460 461 err := c.newExecutor(c.usedNodes.values(), func(node node.ServiceNode) error { 462 return node.Stop() 463 }).Run() 464 465 return c.markStatusWithLock(ClusterStatusSetup, err) 466 } 467 468 func (c *svcCluster) Status() Status { 469 c.RLock() 470 defer c.RUnlock() 471 return c.status 472 }