github.com/m3db/m3@v1.5.0/src/cluster/placement/algo/mirrored.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package algo 22 23 import ( 24 "errors" 25 "fmt" 26 "strconv" 27 28 "github.com/m3db/m3/src/cluster/placement" 29 "github.com/m3db/m3/src/cluster/shard" 30 xerrors "github.com/m3db/m3/src/x/errors" 31 ) 32 33 var ( 34 errIncompatibleWithMirrorAlgo = errors.New("could not apply mirrored algo on the placement") 35 ) 36 37 type mirroredAlgorithm struct { 38 opts placement.Options 39 shardedAlgo placement.Algorithm 40 } 41 42 func newMirroredAlgorithm(opts placement.Options) placement.Algorithm { 43 return mirroredAlgorithm{ 44 opts: opts, 45 // Mirrored algorithm requires full replacement. 46 shardedAlgo: newShardedAlgorithm(opts.SetAllowPartialReplace(false)), 47 } 48 } 49 50 func (a mirroredAlgorithm) IsCompatibleWith(p placement.Placement) error { 51 if !p.IsMirrored() { 52 return errIncompatibleWithMirrorAlgo 53 } 54 55 if !p.IsSharded() { 56 return errIncompatibleWithMirrorAlgo 57 } 58 59 return nil 60 } 61 62 func (a mirroredAlgorithm) InitialPlacement( 63 instances []placement.Instance, 64 shards []uint32, 65 rf int, 66 ) (placement.Placement, error) { 67 mirrorInstances, err := groupInstancesByShardSetID(instances, rf) 68 if err != nil { 69 return nil, err 70 } 71 72 // We use the sharded algorithm to generate a mirror placement with rf equals 1. 73 mirrorPlacement, err := a.shardedAlgo.InitialPlacement(mirrorInstances, shards, 1) 74 if err != nil { 75 return nil, err 76 } 77 78 return placementFromMirror(mirrorPlacement, instances, rf) 79 } 80 81 func (a mirroredAlgorithm) AddReplica(p placement.Placement) (placement.Placement, error) { 82 // TODO(cw): We could support AddReplica(p placement.Placement, instances []placement.Instance) 83 // and apply the shards from the new replica to the adding instances in the future. 84 return nil, errors.New("not supported") 85 } 86 87 func (a mirroredAlgorithm) RemoveInstances( 88 p placement.Placement, 89 instanceIDs []string, 90 ) (placement.Placement, error) { 91 if err := a.IsCompatibleWith(p); err != nil { 92 return nil, err 93 } 94 95 nowNanos := a.opts.NowFn()().UnixNano() 96 // If the instances being removed are all the initializing instances in the placement. 97 // We just need to return these shards back to their sources. 98 if globalChecker.allInitializing(p, instanceIDs, nowNanos) { 99 return a.returnInitializingShards(p, instanceIDs) 100 } 101 102 p, _, err := a.MarkAllShardsAvailable(p) 103 if err != nil { 104 return nil, err 105 } 106 107 removingInstances := make([]placement.Instance, 0, len(instanceIDs)) 108 for _, id := range instanceIDs { 109 instance, ok := p.Instance(id) 110 if !ok { 111 return nil, fmt.Errorf("instance %s does not exist in the placement", id) 112 } 113 removingInstances = append(removingInstances, instance) 114 } 115 116 mirrorPlacement, err := mirrorFromPlacement(p) 117 if err != nil { 118 return nil, err 119 } 120 121 mirrorInstances, err := groupInstancesByShardSetID(removingInstances, p.ReplicaFactor()) 122 if err != nil { 123 return nil, err 124 } 125 126 for _, instance := range mirrorInstances { 127 if mirrorPlacement, err = a.shardedAlgo.RemoveInstances( 128 mirrorPlacement, 129 []string{instance.ID()}, 130 ); err != nil { 131 return nil, err 132 } 133 } 134 return placementFromMirror(mirrorPlacement, p.Instances(), p.ReplicaFactor()) 135 } 136 137 func (a mirroredAlgorithm) AddInstances( 138 p placement.Placement, 139 addingInstances []placement.Instance, 140 ) (placement.Placement, error) { 141 if err := a.IsCompatibleWith(p); err != nil { 142 return nil, err 143 } 144 145 nowNanos := a.opts.NowFn()().UnixNano() 146 // If the instances being added are all the leaving instances in the placement. 147 // We just need to get their shards back. 148 if globalChecker.allLeaving(p, addingInstances, nowNanos) { 149 return a.reclaimLeavingShards(p, addingInstances) 150 } 151 152 p, _, err := a.MarkAllShardsAvailable(p) 153 if err != nil { 154 return nil, err 155 } 156 157 // At this point, all leaving instances in the placement are cleaned up. 158 if addingInstances, err = validAddingInstances(p, addingInstances); err != nil { 159 return nil, err 160 } 161 162 mirrorPlacement, err := mirrorFromPlacement(p) 163 if err != nil { 164 return nil, err 165 } 166 167 mirrorInstances, err := groupInstancesByShardSetID(addingInstances, p.ReplicaFactor()) 168 if err != nil { 169 return nil, err 170 } 171 172 for _, instance := range mirrorInstances { 173 if mirrorPlacement, err = a.shardedAlgo.AddInstances( 174 mirrorPlacement, 175 []placement.Instance{instance}, 176 ); err != nil { 177 return nil, err 178 } 179 } 180 181 return placementFromMirror(mirrorPlacement, append(p.Instances(), addingInstances...), p.ReplicaFactor()) 182 } 183 184 func (a mirroredAlgorithm) ReplaceInstances( 185 p placement.Placement, 186 leavingInstanceIDs []string, 187 addingInstances []placement.Instance, 188 ) (placement.Placement, error) { 189 err := a.IsCompatibleWith(p) 190 if err != nil { 191 return nil, err 192 } 193 194 p = p.Clone() 195 if len(addingInstances) != len(leavingInstanceIDs) { 196 return nil, fmt.Errorf("could not replace %d instances with %d instances for mirrored replace", len(leavingInstanceIDs), len(addingInstances)) 197 } 198 199 nowNanos := a.opts.NowFn()().UnixNano() 200 201 // Revert of pending replace. 202 if localChecker.allLeaving(p, addingInstances, nowNanos) && 203 localChecker.allInitializing(p, leavingInstanceIDs, nowNanos) { 204 if p, err = a.reclaimLeavingShards(p, addingInstances); err != nil { 205 return nil, err 206 } 207 208 return a.returnInitializingShards(p, leavingInstanceIDs) 209 } 210 211 // Mark shards available only for the specified leaving instances and their peers. 212 // This allows multiple replaces that do not overlap by shard ownership. 213 for _, leavingInstanceID := range leavingInstanceIDs { 214 if p, err = a.markInstanceAndItsPeersAvailable(p, leavingInstanceID); err != nil { 215 return nil, err 216 } 217 } 218 219 if !localChecker.allAvailable(p, leavingInstanceIDs, nowNanos) { 220 return nil, fmt.Errorf("replaced instances must have all their shards available") 221 } 222 223 // At this point, all specified leaving instances and their peers in the placement are cleaned up. 224 if addingInstances, err = validAddingInstances(p, addingInstances); err != nil { 225 return nil, err 226 } 227 228 for i := range leavingInstanceIDs { 229 // We want full replacement for each instance. 230 if p, err = a.shardedAlgo.ReplaceInstances( 231 p, 232 leavingInstanceIDs[i:i+1], 233 addingInstances[i:i+1], 234 ); err != nil { 235 return nil, err 236 } 237 } 238 return p, nil 239 } 240 241 func (a mirroredAlgorithm) markInstanceAndItsPeersAvailable( 242 p placement.Placement, 243 instanceID string, 244 ) (placement.Placement, error) { 245 p = p.Clone() 246 instance, exist := p.Instance(instanceID) 247 if !exist { 248 return nil, fmt.Errorf("instance %s does not exist in placement", instanceID) 249 } 250 251 // Find all peers of specified instance - those owning same shardset. 252 // That includes instances that are replaced by this instance or replace this instance. 253 var ownerIDs []string 254 for _, i := range p.Instances() { 255 if i.ShardSetID() == instance.ShardSetID() { 256 ownerIDs = append(ownerIDs, i.ID()) 257 } 258 } 259 260 for _, id := range ownerIDs { 261 instance, exists := p.Instance(id) 262 if !exists { 263 // Instance with leaving shards could already be removed from placement 264 // after initializing shards are marked available (if past cutover time) by below code block. 265 continue 266 } 267 268 for _, s := range instance.Shards().All() { 269 if s.State() == shard.Initializing { 270 var err error 271 // MarkShardsAvailable will properly handle respective leaving shards 272 // of the respective peer leaving instance. 273 p, err = a.shardedAlgo.MarkShardsAvailable(p, id, s.ID()) 274 if err != nil { 275 return nil, xerrors.Wrapf(err, "could not mark shards available of instance %s", id) 276 } 277 } 278 } 279 } 280 281 return p, nil 282 } 283 284 func (a mirroredAlgorithm) MarkShardsAvailable( 285 p placement.Placement, 286 instanceID string, 287 shardIDs ...uint32, 288 ) (placement.Placement, error) { 289 if err := a.IsCompatibleWith(p); err != nil { 290 return nil, err 291 } 292 293 return a.shardedAlgo.MarkShardsAvailable(p, instanceID, shardIDs...) 294 } 295 296 func (a mirroredAlgorithm) MarkAllShardsAvailable( 297 p placement.Placement, 298 ) (placement.Placement, bool, error) { 299 if err := a.IsCompatibleWith(p); err != nil { 300 return nil, false, err 301 } 302 303 return a.shardedAlgo.MarkAllShardsAvailable(p) 304 } 305 306 func (a mirroredAlgorithm) BalanceShards( 307 p placement.Placement, 308 ) (placement.Placement, error) { 309 if err := a.IsCompatibleWith(p); err != nil { 310 return nil, err 311 } 312 313 mirrorPlacement, err := mirrorFromPlacement(p) 314 if err != nil { 315 return nil, err 316 } 317 318 if mirrorPlacement, err = a.shardedAlgo.BalanceShards(mirrorPlacement); err != nil { 319 return nil, err 320 } 321 322 return placementFromMirror(mirrorPlacement, p.Instances(), p.ReplicaFactor()) 323 } 324 325 // returnInitializingShards tries to return initializing shards on the given instances 326 // and retries until no more initializing shards could be returned. 327 func (a mirroredAlgorithm) returnInitializingShards( 328 p placement.Placement, 329 instanceIDs []string, 330 ) (placement.Placement, error) { 331 for { 332 madeProgess := false 333 for _, id := range instanceIDs { 334 _, exist := p.Instance(id) 335 if !exist { 336 continue 337 } 338 ph, instance, err := newRemoveInstanceHelper(p, id, a.opts) 339 if err != nil { 340 return nil, err 341 } 342 numInitShards := instance.Shards().NumShardsForState(shard.Initializing) 343 ph.returnInitializingShards(instance) 344 if instance.Shards().NumShardsForState(shard.Initializing) < numInitShards { 345 // Made some progress on returning shards. 346 madeProgess = true 347 } 348 p = ph.generatePlacement() 349 if instance.Shards().NumShards() > 0 { 350 p = p.SetInstances(append(p.Instances(), instance)) 351 } 352 } 353 if !madeProgess { 354 break 355 } 356 } 357 358 for _, id := range instanceIDs { 359 instance, ok := p.Instance(id) 360 if !ok { 361 continue 362 } 363 numInitializingShards := instance.Shards().NumShardsForState(shard.Initializing) 364 if numInitializingShards != 0 { 365 return nil, fmt.Errorf("there are %d initializing shards could not be returned for instance %s", numInitializingShards, id) 366 } 367 } 368 369 return p, nil 370 } 371 372 // reclaimLeavingShards tries to reclaim leaving shards on the given instances 373 // and retries until no more leaving shards could be reclaimed. 374 func (a mirroredAlgorithm) reclaimLeavingShards( 375 p placement.Placement, 376 addingInstances []placement.Instance, 377 ) (placement.Placement, error) { 378 for { 379 madeProgess := false 380 for _, instance := range addingInstances { 381 ph, instance, err := newAddInstanceHelper(p, instance, a.opts, withAvailableOrLeavingShardsOnly) 382 if err != nil { 383 return nil, err 384 } 385 numLeavingShards := instance.Shards().NumShardsForState(shard.Leaving) 386 ph.reclaimLeavingShards(instance) 387 if instance.Shards().NumShardsForState(shard.Leaving) < numLeavingShards { 388 // Made some progress on reclaiming shards. 389 madeProgess = true 390 } 391 p = ph.generatePlacement() 392 } 393 if !madeProgess { 394 break 395 } 396 } 397 398 for _, instance := range addingInstances { 399 id := instance.ID() 400 instance, ok := p.Instance(id) 401 if !ok { 402 return nil, fmt.Errorf("could not find instance %s in placement after reclaiming leaving shards", id) 403 } 404 numLeavingShards := instance.Shards().NumShardsForState(shard.Leaving) 405 if numLeavingShards != 0 { 406 return nil, fmt.Errorf("there are %d leaving shards could not be reclaimed for instance %s", numLeavingShards, id) 407 } 408 } 409 410 return p, nil 411 } 412 413 func validAddingInstances(p placement.Placement, addingInstances []placement.Instance) ([]placement.Instance, error) { 414 for i, instance := range addingInstances { 415 if _, exist := p.Instance(instance.ID()); exist { 416 return nil, fmt.Errorf("instance %s already exist in the placement", instance.ID()) 417 } 418 if instance.IsLeaving() { 419 // The instance was leaving in placement, after markAllShardsAsAvailable it is now removed 420 // from the placement, so we should treat them as fresh new instances. 421 addingInstances[i] = instance.SetShards(shard.NewShards(nil)) 422 } 423 } 424 return addingInstances, nil 425 } 426 427 func groupInstancesByShardSetID( 428 instances []placement.Instance, 429 rf int, 430 ) ([]placement.Instance, error) { 431 var ( 432 shardSetMap = make(map[uint32]*shardSetMetadata, len(instances)) 433 res = make([]placement.Instance, 0, len(instances)) 434 ) 435 for _, instance := range instances { 436 var ( 437 ssID = instance.ShardSetID() 438 weight = instance.Weight() 439 group = instance.IsolationGroup() 440 shards = instance.Shards() 441 ) 442 meta, ok := shardSetMap[ssID] 443 if !ok { 444 meta = &shardSetMetadata{ 445 weight: weight, 446 groups: make(map[string]struct{}, rf), 447 shards: shards, 448 } 449 shardSetMap[ssID] = meta 450 } 451 if _, ok := meta.groups[group]; ok { 452 return nil, fmt.Errorf("found duplicated isolation group %s for shardset id %d", group, ssID) 453 } 454 455 if meta.weight != weight { 456 return nil, fmt.Errorf("found different weights: %d and %d, for shardset id %d", meta.weight, weight, ssID) 457 } 458 459 if !meta.shards.Equals(shards) { 460 return nil, fmt.Errorf("found different shards: %v and %v, for shardset id %d", meta.shards, shards, ssID) 461 } 462 463 meta.groups[group] = struct{}{} 464 meta.count++ 465 } 466 467 for ssID, meta := range shardSetMap { 468 if meta.count != rf { 469 return nil, fmt.Errorf("found %d count of shard set id %d, expecting %d", meta.count, ssID, rf) 470 } 471 472 // NB(cw) The shard set ID should to be assigned in placement service, 473 // the algorithm does not change the shard set id assigned to each instance. 474 ssIDStr := strconv.Itoa(int(ssID)) 475 res = append( 476 res, 477 placement.NewInstance(). 478 SetID(ssIDStr). 479 SetIsolationGroup(ssIDStr). 480 SetWeight(meta.weight). 481 SetShardSetID(ssID). 482 SetShards(meta.shards.Clone()), 483 ) 484 } 485 486 return res, nil 487 } 488 489 // mirrorFromPlacement zips all instances with the same shardSetID into a virtual instance 490 // and create a placement with those virtual instance and rf=1. 491 func mirrorFromPlacement(p placement.Placement) (placement.Placement, error) { 492 mirrorInstances, err := groupInstancesByShardSetID(p.Instances(), p.ReplicaFactor()) 493 if err != nil { 494 return nil, err 495 } 496 497 return placement.NewPlacement(). 498 SetInstances(mirrorInstances). 499 SetReplicaFactor(1). 500 SetShards(p.Shards()). 501 SetCutoverNanos(p.CutoverNanos()). 502 SetIsSharded(true). 503 SetIsMirrored(true). 504 SetMaxShardSetID(p.MaxShardSetID()), nil 505 } 506 507 // placementFromMirror duplicates the shards for each shard set id and assign 508 // them to the instance with the shard set id. 509 func placementFromMirror( 510 mirror placement.Placement, 511 instances []placement.Instance, 512 rf int, 513 ) (placement.Placement, error) { 514 var ( 515 mirrorInstances = mirror.Instances() 516 shardSetMap = make(map[uint32][]placement.Instance, len(mirrorInstances)) 517 instancesWithShards = make([]placement.Instance, 0, len(instances)) 518 ) 519 for _, instance := range instances { 520 instances, ok := shardSetMap[instance.ShardSetID()] 521 if !ok { 522 instances = make([]placement.Instance, 0, rf) 523 } 524 instances = append(instances, instance) 525 shardSetMap[instance.ShardSetID()] = instances 526 } 527 528 for _, mirrorInstance := range mirrorInstances { 529 instances, err := instancesFromMirror(mirrorInstance, shardSetMap) 530 if err != nil { 531 return nil, err 532 } 533 instancesWithShards = append(instancesWithShards, instances...) 534 } 535 536 return placement.NewPlacement(). 537 SetInstances(instancesWithShards). 538 SetReplicaFactor(rf). 539 SetShards(mirror.Shards()). 540 SetCutoverNanos(mirror.CutoverNanos()). 541 SetIsMirrored(true). 542 SetIsSharded(true). 543 SetMaxShardSetID(mirror.MaxShardSetID()), nil 544 } 545 546 func instancesFromMirror( 547 mirrorInstance placement.Instance, 548 instancesMap map[uint32][]placement.Instance, 549 ) ([]placement.Instance, error) { 550 ssID := mirrorInstance.ShardSetID() 551 instances, ok := instancesMap[ssID] 552 if !ok { 553 return nil, fmt.Errorf("could not find shard set id %d in placement", ssID) 554 } 555 556 shards := mirrorInstance.Shards() 557 for i, instance := range instances { 558 newShards := make([]shard.Shard, shards.NumShards()) 559 for j, s := range shards.All() { 560 // TODO move clone() to shard interface 561 newShard := shard.NewShard(s.ID()).SetState(s.State()).SetCutoffNanos(s.CutoffNanos()).SetCutoverNanos(s.CutoverNanos()) 562 sourceID := s.SourceID() 563 if sourceID != "" { 564 // The sourceID in the mirror placement is shardSetID, need to be converted 565 // to instanceID. 566 shardSetID, err := strconv.Atoi(sourceID) 567 if err != nil { 568 return nil, fmt.Errorf("could not convert source id %s to shard set id", sourceID) 569 } 570 sourceInstances, ok := instancesMap[uint32(shardSetID)] 571 if !ok { 572 return nil, fmt.Errorf("could not find source id %s in placement", sourceID) 573 } 574 575 sourceID = sourceInstances[i].ID() 576 } 577 newShards[j] = newShard.SetSourceID(sourceID) 578 } 579 instances[i] = instance.SetShards(shard.NewShards(newShards)) 580 } 581 return instances, nil 582 } 583 584 type shardSetMetadata struct { 585 weight uint32 586 count int 587 groups map[string]struct{} 588 shards shard.Shards 589 }