github.com/m3db/m3@v1.5.0/src/cluster/placement/algo/sharded_helper.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package algo 22 23 import ( 24 "container/heap" 25 "errors" 26 "fmt" 27 "math" 28 29 "github.com/m3db/m3/src/cluster/placement" 30 "github.com/m3db/m3/src/cluster/shard" 31 32 "go.uber.org/zap" 33 ) 34 35 var ( 36 errAddingInstanceAlreadyExist = errors.New("the adding instance is already in the placement") 37 errInstanceContainsNonLeavingShards = errors.New("the adding instance contains non leaving shards") 38 errInstanceContainsInitializingShards = errors.New("the adding instance contains initializing shards") 39 ) 40 41 type instanceType int 42 43 const ( 44 anyType instanceType = iota 45 withShards 46 withLeavingShardsOnly 47 withAvailableOrLeavingShardsOnly 48 ) 49 50 type optimizeType int 51 52 const ( 53 // safe optimizes the load distribution without violating 54 // minimal shard movement. 55 safe optimizeType = iota 56 // unsafe optimizes the load distribution with the potential of violating 57 // minimal shard movement in order to reach best shard distribution. 58 unsafe 59 ) 60 61 type assignLoadFn func(instance placement.Instance) error 62 63 type placementHelper interface { 64 PlacementHelper 65 66 // placeShards distributes shards to the instances in the helper, with aware of where are the shards coming from. 67 placeShards(shards []shard.Shard, from placement.Instance, candidates []placement.Instance) error 68 69 // addInstance adds an instance to the placement. 70 addInstance(addingInstance placement.Instance) error 71 72 // optimize rebalances the load distribution in the cluster. 73 optimize(t optimizeType) error 74 75 // generatePlacement generates a placement. 76 generatePlacement() placement.Placement 77 78 // reclaimLeavingShards reclaims all the leaving shards on the given instance 79 // by pulling them back from the rest of the cluster. 80 reclaimLeavingShards(instance placement.Instance) 81 82 // returnInitializingShards returns all the initializing shards on the given instance 83 // by returning them back to the original owners. 84 returnInitializingShards(instance placement.Instance) 85 } 86 87 // PlacementHelper helps the algorithm to place shards. 88 type PlacementHelper interface { 89 // Instances returns the list of instances managed by the PlacementHelper. 90 Instances() []placement.Instance 91 92 // CanMoveShard checks if the shard can be moved from the instance to the target isolation group. 93 CanMoveShard(shard uint32, fromInstance placement.Instance, toIsolationGroup string) bool 94 } 95 96 type helper struct { 97 targetLoad map[string]int 98 shardToInstanceMap map[uint32]map[placement.Instance]struct{} 99 groupToInstancesMap map[string]map[placement.Instance]struct{} 100 groupToWeightMap map[string]uint32 101 rf int 102 uniqueShards []uint32 103 instances map[string]placement.Instance 104 log *zap.Logger 105 opts placement.Options 106 totalWeight uint32 107 maxShardSetID uint32 108 } 109 110 // NewPlacementHelper returns a placement helper 111 func NewPlacementHelper(p placement.Placement, opts placement.Options) PlacementHelper { 112 return newHelper(p, p.ReplicaFactor(), opts) 113 } 114 115 func newInitHelper(instances []placement.Instance, ids []uint32, opts placement.Options) placementHelper { 116 emptyPlacement := placement.NewPlacement(). 117 SetInstances(instances). 118 SetShards(ids). 119 SetReplicaFactor(0). 120 SetIsSharded(true). 121 SetCutoverNanos(opts.PlacementCutoverNanosFn()()) 122 return newHelper(emptyPlacement, emptyPlacement.ReplicaFactor()+1, opts) 123 } 124 125 func newAddReplicaHelper(p placement.Placement, opts placement.Options) placementHelper { 126 return newHelper(p, p.ReplicaFactor()+1, opts) 127 } 128 129 func newAddInstanceHelper( 130 p placement.Placement, 131 instance placement.Instance, 132 opts placement.Options, 133 t instanceType, 134 ) (placementHelper, placement.Instance, error) { 135 instanceInPlacement, exist := p.Instance(instance.ID()) 136 if !exist { 137 return newHelper(p.SetInstances(append(p.Instances(), instance)), p.ReplicaFactor(), opts), instance, nil 138 } 139 140 switch t { 141 case withLeavingShardsOnly: 142 if !instanceInPlacement.IsLeaving() { 143 return nil, nil, errInstanceContainsNonLeavingShards 144 } 145 case withAvailableOrLeavingShardsOnly: 146 shards := instanceInPlacement.Shards() 147 if shards.NumShards() != shards.NumShardsForState(shard.Available)+shards.NumShardsForState(shard.Leaving) { 148 return nil, nil, errInstanceContainsInitializingShards 149 } 150 default: 151 return nil, nil, fmt.Errorf("unexpected type %v", t) 152 } 153 154 return newHelper(p, p.ReplicaFactor(), opts), instanceInPlacement, nil 155 } 156 157 func newRemoveInstanceHelper( 158 p placement.Placement, 159 instanceID string, 160 opts placement.Options, 161 ) (placementHelper, placement.Instance, error) { 162 p, leavingInstance, err := removeInstanceFromPlacement(p, instanceID) 163 if err != nil { 164 return nil, nil, err 165 } 166 return newHelper(p, p.ReplicaFactor(), opts), leavingInstance, nil 167 } 168 169 func newReplaceInstanceHelper( 170 p placement.Placement, 171 instanceIDs []string, 172 addingInstances []placement.Instance, 173 opts placement.Options, 174 ) (placementHelper, []placement.Instance, []placement.Instance, error) { 175 var ( 176 leavingInstances = make([]placement.Instance, len(instanceIDs)) 177 err error 178 ) 179 for i, instanceID := range instanceIDs { 180 p, leavingInstances[i], err = removeInstanceFromPlacement(p, instanceID) 181 if err != nil { 182 return nil, nil, nil, err 183 } 184 } 185 186 newAddingInstances := make([]placement.Instance, len(addingInstances)) 187 for i, instance := range addingInstances { 188 p, newAddingInstances[i], err = addInstanceToPlacement(p, instance, anyType) 189 if err != nil { 190 return nil, nil, nil, err 191 } 192 } 193 return newHelper(p, p.ReplicaFactor(), opts), leavingInstances, newAddingInstances, nil 194 } 195 196 func newHelper(p placement.Placement, targetRF int, opts placement.Options) placementHelper { 197 ph := &helper{ 198 rf: targetRF, 199 instances: make(map[string]placement.Instance, p.NumInstances()), 200 uniqueShards: p.Shards(), 201 maxShardSetID: p.MaxShardSetID(), 202 log: opts.InstrumentOptions().Logger(), 203 opts: opts, 204 } 205 206 for _, instance := range p.Instances() { 207 ph.instances[instance.ID()] = instance 208 } 209 210 ph.scanCurrentLoad() 211 ph.buildTargetLoad() 212 return ph 213 } 214 215 func (ph *helper) scanCurrentLoad() { 216 ph.shardToInstanceMap = make(map[uint32]map[placement.Instance]struct{}, len(ph.uniqueShards)) 217 ph.groupToInstancesMap = make(map[string]map[placement.Instance]struct{}) 218 ph.groupToWeightMap = make(map[string]uint32) 219 totalWeight := uint32(0) 220 for _, instance := range ph.instances { 221 if _, exist := ph.groupToInstancesMap[instance.IsolationGroup()]; !exist { 222 ph.groupToInstancesMap[instance.IsolationGroup()] = make(map[placement.Instance]struct{}) 223 } 224 ph.groupToInstancesMap[instance.IsolationGroup()][instance] = struct{}{} 225 226 if instance.IsLeaving() { 227 // Leaving instances are not counted as usable capacities in the placement. 228 continue 229 } 230 231 ph.groupToWeightMap[instance.IsolationGroup()] = ph.groupToWeightMap[instance.IsolationGroup()] + instance.Weight() 232 totalWeight += instance.Weight() 233 234 for _, s := range instance.Shards().All() { 235 if s.State() == shard.Leaving { 236 continue 237 } 238 ph.assignShardToInstance(s, instance) 239 } 240 } 241 ph.totalWeight = totalWeight 242 } 243 244 func (ph *helper) buildTargetLoad() { 245 overWeightedGroups := 0 246 overWeight := uint32(0) 247 for _, weight := range ph.groupToWeightMap { 248 if isOverWeighted(weight, ph.totalWeight, ph.rf) { 249 overWeightedGroups++ 250 overWeight += weight 251 } 252 } 253 254 targetLoad := make(map[string]int, len(ph.instances)) 255 for _, instance := range ph.instances { 256 if instance.IsLeaving() { 257 // We should not set a target load for leaving instances. 258 continue 259 } 260 igWeight := ph.groupToWeightMap[instance.IsolationGroup()] 261 if isOverWeighted(igWeight, ph.totalWeight, ph.rf) { 262 // If the instance is on a over-sized isolation group, the target load 263 // equals (shardLen / capacity of the isolation group). 264 targetLoad[instance.ID()] = int(math.Ceil(float64(ph.getShardLen()) * float64(instance.Weight()) / float64(igWeight))) 265 } else { 266 // If the instance is on a normal isolation group, get the target load 267 // with aware of other over-sized isolation group. 268 targetLoad[instance.ID()] = ph.getShardLen() * (ph.rf - overWeightedGroups) * int(instance.Weight()) / int(ph.totalWeight-overWeight) 269 } 270 } 271 ph.targetLoad = targetLoad 272 } 273 274 func (ph *helper) Instances() []placement.Instance { 275 res := make([]placement.Instance, 0, len(ph.instances)) 276 for _, instance := range ph.instances { 277 res = append(res, instance) 278 } 279 return res 280 } 281 282 func (ph *helper) getShardLen() int { 283 return len(ph.uniqueShards) 284 } 285 286 func (ph *helper) targetLoadForInstance(id string) int { 287 return ph.targetLoad[id] 288 } 289 290 func (ph *helper) moveOneShard(from, to placement.Instance) bool { 291 // The order matter here: 292 // The Unknown shards were just moved, so free to be moved around. 293 // The Initializing shards were still being initialized on the instance, 294 // so moving them are cheaper than moving those Available shards. 295 return ph.moveOneShardInState(from, to, shard.Unknown) || 296 ph.moveOneShardInState(from, to, shard.Initializing) || 297 ph.moveOneShardInState(from, to, shard.Available) 298 } 299 300 // nolint: unparam 301 func (ph *helper) moveOneShardInState(from, to placement.Instance, state shard.State) bool { 302 for _, s := range from.Shards().ShardsForState(state) { 303 if ph.moveShard(s, from, to) { 304 return true 305 } 306 } 307 return false 308 } 309 310 func (ph *helper) moveShard(candidateShard shard.Shard, from, to placement.Instance) bool { 311 shardID := candidateShard.ID() 312 if !ph.canAssignInstance(shardID, from, to) { 313 return false 314 } 315 316 if candidateShard.State() == shard.Leaving { 317 // should not move a Leaving shard, 318 // Leaving shard will be removed when the Initializing shard is marked as Available 319 return false 320 } 321 322 newShard := shard.NewShard(shardID) 323 324 if from != nil { 325 switch candidateShard.State() { 326 case shard.Unknown, shard.Initializing: 327 from.Shards().Remove(shardID) 328 newShard.SetSourceID(candidateShard.SourceID()) 329 case shard.Available: 330 candidateShard. 331 SetState(shard.Leaving). 332 SetCutoffNanos(ph.opts.ShardCutoffNanosFn()()) 333 newShard.SetSourceID(from.ID()) 334 } 335 336 delete(ph.shardToInstanceMap[shardID], from) 337 } 338 339 curShard, ok := to.Shards().Shard(shardID) 340 if ok && curShard.State() == shard.Leaving { 341 // NB(cw): if the instance already owns the shard in Leaving state, 342 // simply mark it as Available 343 newShard = shard.NewShard(shardID).SetState(shard.Available) 344 // NB(cw): Break the link between new owner of this shard with this Leaving instance 345 instances := ph.shardToInstanceMap[shardID] 346 for instance := range instances { 347 shards := instance.Shards() 348 initShard, ok := shards.Shard(shardID) 349 if ok && initShard.SourceID() == to.ID() { 350 initShard.SetSourceID("") 351 } 352 } 353 354 } 355 356 ph.assignShardToInstance(newShard, to) 357 return true 358 } 359 360 func (ph *helper) CanMoveShard(shard uint32, from placement.Instance, toIsolationGroup string) bool { 361 if from != nil { 362 if from.IsolationGroup() == toIsolationGroup { 363 return true 364 } 365 } 366 for instance := range ph.shardToInstanceMap[shard] { 367 if instance.IsolationGroup() == toIsolationGroup { 368 return false 369 } 370 } 371 return true 372 } 373 374 func (ph *helper) buildInstanceHeap(instances []placement.Instance, availableCapacityAscending bool) (heap.Interface, error) { 375 return newHeap(instances, availableCapacityAscending, ph.targetLoad, ph.groupToWeightMap) 376 } 377 378 func (ph *helper) generatePlacement() placement.Placement { 379 var instances = make([]placement.Instance, 0, len(ph.instances)) 380 381 for _, instance := range ph.instances { 382 if instance.Shards().NumShards() > 0 { 383 instances = append(instances, instance) 384 } 385 } 386 387 maxShardSetID := ph.maxShardSetID 388 for _, instance := range instances { 389 shards := instance.Shards() 390 for _, s := range shards.ShardsForState(shard.Unknown) { 391 shards.Add(shard.NewShard(s.ID()). 392 SetSourceID(s.SourceID()). 393 SetState(shard.Initializing). 394 SetCutoverNanos(ph.opts.ShardCutoverNanosFn()())) 395 } 396 if shardSetID := instance.ShardSetID(); shardSetID >= maxShardSetID { 397 maxShardSetID = shardSetID 398 } 399 } 400 401 return placement.NewPlacement(). 402 SetInstances(instances). 403 SetShards(ph.uniqueShards). 404 SetReplicaFactor(ph.rf). 405 SetIsSharded(true). 406 SetIsMirrored(ph.opts.IsMirrored()). 407 SetCutoverNanos(ph.opts.PlacementCutoverNanosFn()()). 408 SetMaxShardSetID(maxShardSetID) 409 } 410 411 func (ph *helper) placeShards( 412 shards []shard.Shard, 413 from placement.Instance, 414 candidates []placement.Instance, 415 ) error { 416 shardSet := getShardMap(shards) 417 if from != nil { 418 // NB(cw) when removing an adding instance that has not finished bootstrapping its 419 // Initializing shards, prefer to return those Initializing shards back to the leaving instance 420 // to reduce some bootstrapping work in the cluster. 421 ph.returnInitializingShardsToSource(shardSet, from, candidates) 422 } 423 424 instanceHeap, err := ph.buildInstanceHeap(nonLeavingInstances(candidates), true) 425 if err != nil { 426 return err 427 } 428 // if there are shards left to be assigned, distribute them evenly 429 var triedInstances []placement.Instance 430 for _, s := range shardSet { 431 if s.State() == shard.Leaving { 432 continue 433 } 434 moved := false 435 for instanceHeap.Len() > 0 { 436 tryInstance := heap.Pop(instanceHeap).(placement.Instance) 437 triedInstances = append(triedInstances, tryInstance) 438 if ph.moveShard(s, from, tryInstance) { 439 moved = true 440 break 441 } 442 } 443 if !moved { 444 // This should only happen when RF > number of isolation groups. 445 return errNotEnoughIsolationGroups 446 } 447 for _, triedInstance := range triedInstances { 448 heap.Push(instanceHeap, triedInstance) 449 } 450 triedInstances = triedInstances[:0] 451 } 452 return nil 453 } 454 455 func (ph *helper) returnInitializingShards(instance placement.Instance) { 456 shardSet := getShardMap(instance.Shards().All()) 457 ph.returnInitializingShardsToSource(shardSet, instance, ph.Instances()) 458 } 459 460 func (ph *helper) returnInitializingShardsToSource( 461 shardSet map[uint32]shard.Shard, 462 from placement.Instance, 463 candidates []placement.Instance, 464 ) { 465 candidateMap := make(map[string]placement.Instance, len(candidates)) 466 for _, candidate := range candidates { 467 candidateMap[candidate.ID()] = candidate 468 } 469 for _, s := range shardSet { 470 if s.State() != shard.Initializing { 471 continue 472 } 473 sourceID := s.SourceID() 474 if sourceID == "" { 475 continue 476 } 477 sourceInstance, ok := candidateMap[sourceID] 478 if !ok { 479 // NB(cw): This is not an error because the candidates are not 480 // necessarily all the instances in the placement. 481 continue 482 } 483 if sourceInstance.IsLeaving() { 484 continue 485 } 486 if ph.moveShard(s, from, sourceInstance) { 487 delete(shardSet, s.ID()) 488 } 489 } 490 } 491 492 func (ph *helper) mostUnderLoadedInstance() (placement.Instance, bool) { 493 var ( 494 res placement.Instance 495 maxLoadGap int 496 totalLoadSurplus int 497 ) 498 499 for id, instance := range ph.instances { 500 loadGap := ph.targetLoad[id] - loadOnInstance(instance) 501 if loadGap > maxLoadGap { 502 maxLoadGap = loadGap 503 res = instance 504 } 505 if loadGap == maxLoadGap && res != nil && res.ID() > id { 506 res = instance 507 } 508 if loadGap < 0 { 509 totalLoadSurplus -= loadGap 510 } 511 } 512 if maxLoadGap > 0 && totalLoadSurplus != 0 { 513 return res, true 514 } 515 return nil, false 516 } 517 518 func (ph *helper) optimize(t optimizeType) error { 519 var fn assignLoadFn 520 switch t { 521 case safe: 522 fn = ph.assignLoadToInstanceSafe 523 case unsafe: 524 fn = ph.assignLoadToInstanceUnsafe 525 } 526 uniq := make(map[string]struct{}, len(ph.instances)) 527 for { 528 ins, ok := ph.mostUnderLoadedInstance() 529 if !ok { 530 return nil 531 } 532 if _, exist := uniq[ins.ID()]; exist { 533 return nil 534 } 535 536 uniq[ins.ID()] = struct{}{} 537 if err := fn(ins); err != nil { 538 return err 539 } 540 } 541 } 542 543 func (ph *helper) assignLoadToInstanceSafe(addingInstance placement.Instance) error { 544 return ph.assignTargetLoad(addingInstance, func(from, to placement.Instance) bool { 545 return ph.moveOneShardInState(from, to, shard.Unknown) 546 }) 547 } 548 549 func (ph *helper) assignLoadToInstanceUnsafe(addingInstance placement.Instance) error { 550 return ph.assignTargetLoad(addingInstance, func(from, to placement.Instance) bool { 551 return ph.moveOneShard(from, to) 552 }) 553 } 554 555 func (ph *helper) reclaimLeavingShards(instance placement.Instance) { 556 if instance.Shards().NumShardsForState(shard.Leaving) == 0 { 557 // Shortcut if there is nothing to be reclaimed. 558 return 559 } 560 id := instance.ID() 561 for _, i := range ph.instances { 562 for _, s := range i.Shards().ShardsForState(shard.Initializing) { 563 if s.SourceID() == id { 564 // NB(cw) in very rare case, the leaving shards could not be taken back. 565 // For example: in a RF=2 case, instance a and b on ig1, instance c on ig2, 566 // c took shard1 from instance a, before we tried to assign shard1 back to instance a, 567 // b got assigned shard1, now if we try to add instance a back to the topology, a can 568 // no longer take shard1 back. 569 // But it's fine, the algo will fil up those load with other shards from the cluster 570 ph.moveShard(s, i, instance) 571 } 572 } 573 } 574 } 575 576 func (ph *helper) addInstance(addingInstance placement.Instance) error { 577 ph.reclaimLeavingShards(addingInstance) 578 return ph.assignLoadToInstanceUnsafe(addingInstance) 579 } 580 581 func (ph *helper) assignTargetLoad( 582 targetInstance placement.Instance, 583 moveOneShardFn func(from, to placement.Instance) bool, 584 ) error { 585 targetLoad := ph.targetLoadForInstance(targetInstance.ID()) 586 // try to take shards from the most loaded instances until the adding instance reaches target load 587 instanceHeap, err := ph.buildInstanceHeap(nonLeavingInstances(ph.Instances()), false) 588 if err != nil { 589 return err 590 } 591 for targetInstance.Shards().NumShards() < targetLoad && instanceHeap.Len() > 0 { 592 fromInstance := heap.Pop(instanceHeap).(placement.Instance) 593 if moved := moveOneShardFn(fromInstance, targetInstance); moved { 594 heap.Push(instanceHeap, fromInstance) 595 } 596 } 597 return nil 598 } 599 600 func (ph *helper) canAssignInstance(shardID uint32, from, to placement.Instance) bool { 601 s, ok := to.Shards().Shard(shardID) 602 if ok && s.State() != shard.Leaving { 603 // NB(cw): a Leaving shard is not counted to the load of the instance 604 // so the instance should be able to take the ownership back if needed 605 // assuming i1 owns shard 1 as Available, this case can be triggered by: 606 // 1: add i2, now shard 1 is "Leaving" on i1 and "Initializing" on i2 607 // 2: remove i2, now i2 needs to return shard 1 back to i1 608 // and i1 should be able to take it and mark it as "Available" 609 return false 610 } 611 return ph.CanMoveShard(shardID, from, to.IsolationGroup()) 612 } 613 614 func (ph *helper) assignShardToInstance(s shard.Shard, to placement.Instance) { 615 to.Shards().Add(s) 616 617 if _, exist := ph.shardToInstanceMap[s.ID()]; !exist { 618 ph.shardToInstanceMap[s.ID()] = make(map[placement.Instance]struct{}) 619 } 620 ph.shardToInstanceMap[s.ID()][to] = struct{}{} 621 } 622 623 // instanceHeap provides an easy way to get best candidate instance to assign/steal a shard 624 type instanceHeap struct { 625 instances []placement.Instance 626 igToWeightMap map[string]uint32 627 targetLoad map[string]int 628 capacityAscending bool 629 } 630 631 func newHeap( 632 instances []placement.Instance, 633 capacityAscending bool, 634 targetLoad map[string]int, 635 igToWeightMap map[string]uint32, 636 ) (*instanceHeap, error) { 637 h := &instanceHeap{ 638 capacityAscending: capacityAscending, 639 instances: instances, 640 targetLoad: targetLoad, 641 igToWeightMap: igToWeightMap, 642 } 643 heap.Init(h) 644 return h, nil 645 } 646 647 func (h *instanceHeap) targetLoadForInstance(id string) int { 648 return h.targetLoad[id] 649 } 650 651 func (h *instanceHeap) Len() int { 652 return len(h.instances) 653 } 654 655 func (h *instanceHeap) Less(i, j int) bool { 656 instanceI := h.instances[i] 657 instanceJ := h.instances[j] 658 leftLoadOnI := h.targetLoadForInstance(instanceI.ID()) - loadOnInstance(instanceI) 659 leftLoadOnJ := h.targetLoadForInstance(instanceJ.ID()) - loadOnInstance(instanceJ) 660 // If both instance has tokens to be filled, prefer the one from bigger isolation group 661 // since it tends to be more picky in accepting shards 662 if leftLoadOnI > 0 && leftLoadOnJ > 0 && instanceI.IsolationGroup() != instanceJ.IsolationGroup() { 663 var ( 664 igWeightI = h.igToWeightMap[instanceI.IsolationGroup()] 665 igWeightJ = h.igToWeightMap[instanceJ.IsolationGroup()] 666 ) 667 if igWeightI != igWeightJ { 668 return igWeightI > igWeightJ 669 } 670 } 671 // compare left capacity on both instances 672 if leftLoadOnI == leftLoadOnJ { 673 return instanceI.ID() < instanceJ.ID() 674 } 675 if h.capacityAscending { 676 return leftLoadOnI > leftLoadOnJ 677 } 678 return leftLoadOnI < leftLoadOnJ 679 } 680 681 func (h instanceHeap) Swap(i, j int) { 682 h.instances[i], h.instances[j] = h.instances[j], h.instances[i] 683 } 684 685 func (h *instanceHeap) Push(i interface{}) { 686 instance := i.(placement.Instance) 687 h.instances = append(h.instances, instance) 688 } 689 690 func (h *instanceHeap) Pop() interface{} { 691 n := len(h.instances) 692 instance := h.instances[n-1] 693 h.instances = h.instances[0 : n-1] 694 return instance 695 } 696 697 func isOverWeighted(igWeight, totalWeight uint32, rf int) bool { 698 return float64(igWeight)/float64(totalWeight) >= 1.0/float64(rf) 699 } 700 701 func addInstanceToPlacement( 702 p placement.Placement, 703 i placement.Instance, 704 t instanceType, 705 ) (placement.Placement, placement.Instance, error) { 706 if _, exist := p.Instance(i.ID()); exist { 707 return nil, nil, errAddingInstanceAlreadyExist 708 } 709 710 switch t { 711 case anyType: 712 case withShards: 713 if i.Shards().NumShards() == 0 { 714 return p, i, nil 715 } 716 default: 717 return nil, nil, fmt.Errorf("unexpected type %v", t) 718 } 719 720 instance := i.Clone() 721 return p.SetInstances(append(p.Instances(), instance)), instance, nil 722 } 723 724 func removeInstanceFromPlacement(p placement.Placement, id string) (placement.Placement, placement.Instance, error) { 725 leavingInstance, exist := p.Instance(id) 726 if !exist { 727 return nil, nil, fmt.Errorf("instance %s does not exist in placement", id) 728 } 729 return p.SetInstances(removeInstanceFromList(p.Instances(), id)), leavingInstance, nil 730 } 731 732 func getShardMap(shards []shard.Shard) map[uint32]shard.Shard { 733 r := make(map[uint32]shard.Shard, len(shards)) 734 735 for _, s := range shards { 736 r[s.ID()] = s 737 } 738 return r 739 } 740 741 func loadOnInstance(instance placement.Instance) int { 742 return instance.Shards().NumShards() - instance.Shards().NumShardsForState(shard.Leaving) 743 } 744 745 func nonLeavingInstances(instances []placement.Instance) []placement.Instance { 746 r := make([]placement.Instance, 0, len(instances)) 747 for _, instance := range instances { 748 if instance.IsLeaving() { 749 continue 750 } 751 r = append(r, instance) 752 } 753 754 return r 755 } 756 757 func newShards(shardIDs []uint32) []shard.Shard { 758 r := make([]shard.Shard, len(shardIDs)) 759 for i, id := range shardIDs { 760 r[i] = shard.NewShard(id).SetState(shard.Unknown) 761 } 762 return r 763 } 764 765 func removeInstanceFromList(instances []placement.Instance, instanceID string) []placement.Instance { 766 for i, instance := range instances { 767 if instance.ID() == instanceID { 768 last := len(instances) - 1 769 instances[i] = instances[last] 770 return instances[:last] 771 } 772 } 773 return instances 774 } 775 776 func markShardsAvailable(p placement.Placement, instanceID string, shardIDs []uint32, opts placement.Options) (placement.Placement, error) { 777 instance, exist := p.Instance(instanceID) 778 if !exist { 779 return nil, fmt.Errorf("instance %s does not exist in placement", instanceID) 780 } 781 782 shards := instance.Shards() 783 for _, shardID := range shardIDs { 784 s, exist := shards.Shard(shardID) 785 if !exist { 786 return nil, fmt.Errorf("shard %d does not exist in instance %s", shardID, instanceID) 787 } 788 789 if s.State() != shard.Initializing { 790 return nil, fmt.Errorf("could not mark shard %d as available, it's not in Initializing state", s.ID()) 791 } 792 793 isCutoverFn := opts.IsShardCutoverFn() 794 if isCutoverFn != nil { 795 if err := isCutoverFn(s); err != nil { 796 return nil, err 797 } 798 } 799 800 p = p.SetCutoverNanos(opts.PlacementCutoverNanosFn()()) 801 sourceID := s.SourceID() 802 shards.Add(shard.NewShard(shardID).SetState(shard.Available)) 803 804 // There could be no source for cases like initial placement. 805 if sourceID == "" { 806 continue 807 } 808 809 sourceInstance, exist := p.Instance(sourceID) 810 if !exist { 811 return nil, fmt.Errorf("source instance %s for shard %d does not exist in placement", sourceID, shardID) 812 } 813 814 sourceShards := sourceInstance.Shards() 815 leavingShard, exist := sourceShards.Shard(shardID) 816 if !exist { 817 return nil, fmt.Errorf("shard %d does not exist in source instance %s", shardID, sourceID) 818 } 819 820 if leavingShard.State() != shard.Leaving { 821 return nil, fmt.Errorf("shard %d is not leaving instance %s", shardID, sourceID) 822 } 823 824 isCutoffFn := opts.IsShardCutoffFn() 825 if isCutoffFn != nil { 826 if err := isCutoffFn(leavingShard); err != nil { 827 return nil, err 828 } 829 } 830 831 sourceShards.Remove(shardID) 832 if sourceShards.NumShards() == 0 { 833 p = p.SetInstances(removeInstanceFromList(p.Instances(), sourceInstance.ID())) 834 } 835 } 836 837 return p, nil 838 } 839 840 // tryCleanupShardState cleans up the shard states if the user only 841 // wants to keep stable shard state in the placement. 842 func tryCleanupShardState( 843 p placement.Placement, 844 opts placement.Options, 845 ) (placement.Placement, error) { 846 if opts.ShardStateMode() == placement.StableShardStateOnly { 847 p, _, err := markAllShardsAvailable( 848 p, 849 opts.SetIsShardCutoverFn(nil).SetIsShardCutoffFn(nil), 850 ) 851 return p, err 852 } 853 return p, nil 854 } 855 856 func markAllShardsAvailable( 857 p placement.Placement, 858 opts placement.Options, 859 ) (placement.Placement, bool, error) { 860 var ( 861 err error 862 updated = false 863 ) 864 p = p.Clone() 865 for _, instance := range p.Instances() { 866 for _, s := range instance.Shards().All() { 867 if s.State() == shard.Initializing { 868 p, err = markShardsAvailable(p, instance.ID(), []uint32{s.ID()}, opts) 869 if err != nil { 870 return nil, false, err 871 } 872 updated = true 873 } 874 } 875 } 876 return p, updated, nil 877 }