github.com/m3db/m3@v1.5.0/src/cluster/placement/placement.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package placement 22 23 import ( 24 "errors" 25 "fmt" 26 "sort" 27 "strings" 28 29 "github.com/m3db/m3/src/cluster/generated/proto/placementpb" 30 "github.com/m3db/m3/src/cluster/shard" 31 xerrors "github.com/m3db/m3/src/x/errors" 32 ) 33 34 const ( 35 // uninitializedShardSetID represents uninitialized shard set id. 36 uninitializedShardSetID = 0 37 ) 38 39 var ( 40 errNilPlacementProto = errors.New("nil placement proto") 41 errNilPlacementInstanceProto = errors.New("nil placement instance proto") 42 errDuplicatedShards = errors.New("invalid placement, there are duplicated shards in one replica") 43 errUnexpectedShards = errors.New("invalid placement, there are unexpected shard ids on instance") 44 errMirrorNotSharded = errors.New("invalid placement, mirrored placement must be sharded") 45 ) 46 47 type placement struct { 48 instances map[string]Instance 49 instancesByShard map[uint32][]Instance 50 rf int 51 shards []uint32 52 cutoverNanos int64 53 version int 54 maxShardSetID uint32 55 isSharded bool 56 isMirrored bool 57 } 58 59 // NewPlacement returns a ServicePlacement 60 func NewPlacement() Placement { 61 return &placement{} 62 } 63 64 // NewPlacementFromProto creates a new placement from proto. 65 func NewPlacementFromProto(p *placementpb.Placement) (Placement, error) { 66 if p == nil { 67 return nil, errNilPlacementProto 68 } 69 70 shards := make([]uint32, p.NumShards) 71 for i := uint32(0); i < p.NumShards; i++ { 72 shards[i] = i 73 } 74 instances := make([]Instance, 0, len(p.Instances)) 75 for _, instance := range p.Instances { 76 pi, err := NewInstanceFromProto(instance) 77 if err != nil { 78 return nil, err 79 } 80 instances = append(instances, pi) 81 } 82 83 return NewPlacement(). 84 SetInstances(instances). 85 SetShards(shards). 86 SetReplicaFactor(int(p.ReplicaFactor)). 87 SetIsSharded(p.IsSharded). 88 SetCutoverNanos(p.CutoverTime). 89 SetIsMirrored(p.IsMirrored). 90 SetMaxShardSetID(p.MaxShardSetId), nil 91 } 92 93 func (p *placement) InstancesForShard(shard uint32) []Instance { 94 if len(p.instancesByShard) == 0 { 95 return nil 96 } 97 return p.instancesByShard[shard] 98 } 99 100 func (p *placement) Instances() []Instance { 101 result := make([]Instance, 0, p.NumInstances()) 102 for _, instance := range p.instances { 103 result = append(result, instance) 104 } 105 sort.Sort(ByIDAscending(result)) 106 return result 107 } 108 109 func (p *placement) SetInstances(instances []Instance) Placement { 110 instancesMap := make(map[string]Instance, len(instances)) 111 instancesByShard := make(map[uint32][]Instance) 112 for _, instance := range instances { 113 instancesMap[instance.ID()] = instance 114 for _, shard := range instance.Shards().AllIDs() { 115 instancesByShard[shard] = append(instancesByShard[shard], instance) 116 } 117 } 118 119 // Sort the instances by their ids for deterministic ordering. 120 for _, instances := range instancesByShard { 121 sort.Sort(ByIDAscending(instances)) 122 } 123 124 p.instancesByShard = instancesByShard 125 p.instances = instancesMap 126 return p 127 } 128 129 func (p *placement) NumInstances() int { 130 return len(p.instances) 131 } 132 133 func (p *placement) Instance(id string) (Instance, bool) { 134 instance, ok := p.instances[id] 135 return instance, ok 136 } 137 138 func (p *placement) ReplicaFactor() int { 139 return p.rf 140 } 141 142 func (p *placement) SetReplicaFactor(rf int) Placement { 143 p.rf = rf 144 return p 145 } 146 147 func (p *placement) Shards() []uint32 { 148 return p.shards 149 } 150 151 func (p *placement) SetShards(shards []uint32) Placement { 152 p.shards = shards 153 return p 154 } 155 156 func (p *placement) NumShards() int { 157 return len(p.shards) 158 } 159 160 func (p *placement) IsSharded() bool { 161 return p.isSharded 162 } 163 164 func (p *placement) SetIsSharded(v bool) Placement { 165 p.isSharded = v 166 return p 167 } 168 169 func (p *placement) IsMirrored() bool { 170 return p.isMirrored 171 } 172 173 func (p *placement) SetIsMirrored(v bool) Placement { 174 p.isMirrored = v 175 return p 176 } 177 178 func (p *placement) MaxShardSetID() uint32 { 179 return p.maxShardSetID 180 } 181 182 func (p *placement) SetMaxShardSetID(v uint32) Placement { 183 p.maxShardSetID = v 184 return p 185 } 186 187 func (p *placement) CutoverNanos() int64 { 188 return p.cutoverNanos 189 } 190 191 func (p *placement) SetCutoverNanos(cutoverNanos int64) Placement { 192 p.cutoverNanos = cutoverNanos 193 return p 194 } 195 196 func (p *placement) Version() int { 197 return p.version 198 } 199 200 func (p *placement) SetVersion(v int) Placement { 201 p.version = v 202 return p 203 } 204 205 func (p *placement) String() string { 206 return fmt.Sprintf( 207 "Placement[Instances=%s, NumShards=%d, ReplicaFactor=%d, IsSharded=%v, IsMirrored=%v]", 208 p.Instances(), p.NumShards(), p.ReplicaFactor(), p.IsSharded(), p.IsMirrored(), 209 ) 210 } 211 212 func (p *placement) Proto() (*placementpb.Placement, error) { 213 instances := make(map[string]*placementpb.Instance, p.NumInstances()) 214 for _, instance := range p.Instances() { 215 pi, err := instance.Proto() 216 if err != nil { 217 return nil, err 218 } 219 instances[instance.ID()] = pi 220 } 221 222 return &placementpb.Placement{ 223 Instances: instances, 224 ReplicaFactor: uint32(p.ReplicaFactor()), 225 NumShards: uint32(p.NumShards()), 226 IsSharded: p.IsSharded(), 227 CutoverTime: p.CutoverNanos(), 228 IsMirrored: p.IsMirrored(), 229 MaxShardSetId: p.MaxShardSetID(), 230 }, nil 231 } 232 233 func (p *placement) Clone() Placement { 234 return NewPlacement(). 235 SetInstances(Instances(p.Instances()).Clone()). 236 SetShards(p.Shards()). 237 SetReplicaFactor(p.ReplicaFactor()). 238 SetIsSharded(p.IsSharded()). 239 SetIsMirrored(p.IsMirrored()). 240 SetCutoverNanos(p.CutoverNanos()). 241 SetMaxShardSetID(p.MaxShardSetID()). 242 SetVersion(p.Version()) 243 } 244 245 // Validate validates a placement to ensure: 246 // - The shards on each instance are in valid state. 247 // - The total number of shards match rf * num_shards_per_replica. 248 // - Each shard shows up rf times. 249 // - There is one Initializing shard for each Leaving shard. 250 // - The instances with same shard_set_id owns the same shards. 251 func Validate(p Placement) error { 252 if err := validate(p); err != nil { 253 return xerrors.NewInvalidParamsError(err) 254 } 255 return nil 256 } 257 258 func validate(p Placement) error { 259 if p.IsMirrored() && !p.IsSharded() { 260 return errMirrorNotSharded 261 } 262 263 shardCountMap := convertShardSliceToMap(p.Shards()) 264 if len(shardCountMap) != len(p.Shards()) { 265 return errDuplicatedShards 266 } 267 268 expectedTotal := len(p.Shards()) * p.ReplicaFactor() 269 totalCapacity := 0 270 totalLeaving := 0 271 totalInit := 0 272 totalInitWithSourceID := 0 273 instancesLeavingShardsWithMatchingInitShards := make(map[string]map[uint32]string) 274 maxShardSetID := p.MaxShardSetID() 275 instancesByShardSetID := make(map[uint32]Instance, p.NumInstances()) 276 for _, instance := range p.Instances() { 277 if instance.Endpoint() == "" { 278 return fmt.Errorf("instance %s does not contain valid endpoint", instance.String()) 279 } 280 if instance.Shards().NumShards() == 0 && p.IsSharded() { 281 return fmt.Errorf("instance %s contains no shard in a sharded placement", instance.String()) 282 } 283 if instance.Shards().NumShards() != 0 && !p.IsSharded() { 284 return fmt.Errorf("instance %s contains shards in a non-sharded placement", instance.String()) 285 } 286 shardSetID := instance.ShardSetID() 287 if shardSetID > maxShardSetID { 288 return fmt.Errorf("instance %s shard set id %d is larger than max shard set id %d in the placement", instance.String(), shardSetID, maxShardSetID) 289 } 290 for _, s := range instance.Shards().All() { 291 count, exist := shardCountMap[s.ID()] 292 if !exist { 293 return errUnexpectedShards 294 } 295 switch s.State() { 296 case shard.Available: 297 shardCountMap[s.ID()] = count + 1 298 totalCapacity++ 299 case shard.Initializing: 300 totalInit++ 301 shardCountMap[s.ID()] = count + 1 302 totalCapacity++ 303 if sourceID := s.SourceID(); sourceID != "" { 304 totalInitWithSourceID++ 305 306 // Check the instance. 307 leaving, ok := p.Instance(sourceID) 308 if !ok { 309 return fmt.Errorf( 310 "instance %s has initializing shard %d with "+ 311 "source ID %s but no such instance in placement", 312 instance.ID(), s.ID(), sourceID) 313 } 314 315 // Check has leaving shard. 316 leavingShard, ok := leaving.Shards().Shard(s.ID()) 317 if !ok { 318 return fmt.Errorf( 319 "instance %s has initializing shard %d with "+ 320 "source ID %s but leaving instance has no such shard", 321 instance.ID(), s.ID(), sourceID) 322 } 323 324 // Check the shard is leaving. 325 if state := leavingShard.State(); state != shard.Leaving { 326 return fmt.Errorf( 327 "instance %s has initializing shard %d with "+ 328 "source ID %s but leaving instance has shard with state %s", 329 instance.ID(), s.ID(), sourceID, state.String()) 330 } 331 332 // Make sure does not get double matched. 333 matches, ok := instancesLeavingShardsWithMatchingInitShards[sourceID] 334 if !ok { 335 matches = make(map[uint32]string) 336 instancesLeavingShardsWithMatchingInitShards[sourceID] = matches 337 } 338 339 match, ok := matches[s.ID()] 340 if ok { 341 return fmt.Errorf( 342 "instance %s has initializing shard %d with "+ 343 "source ID %s but leaving instance has shard already matched by %s", 344 instance.ID(), s.ID(), sourceID, match) 345 } 346 347 // Track that it's matched. 348 matches[s.ID()] = instance.ID() 349 } 350 case shard.Leaving: 351 totalLeaving++ 352 default: 353 return fmt.Errorf("invalid shard state %v for shard %d", s.State(), s.ID()) 354 } 355 } 356 if shardSetID == uninitializedShardSetID { 357 continue 358 } 359 existingInstance, exists := instancesByShardSetID[shardSetID] 360 if !exists { 361 instancesByShardSetID[shardSetID] = instance 362 } else { 363 // Both existing shard ids and current shard ids are sorted in ascending order. 364 existingShardIDs := existingInstance.Shards().AllIDs() 365 currShardIDs := instance.Shards().AllIDs() 366 if len(existingShardIDs) != len(currShardIDs) { 367 return fmt.Errorf("instance %s and %s have the same shard set id %d but different number of shards", existingInstance.String(), instance.String(), shardSetID) 368 } 369 for i := 0; i < len(existingShardIDs); i++ { 370 if existingShardIDs[i] != currShardIDs[i] { 371 return fmt.Errorf("instance %s and %s have the same shard set id %d but different shards", existingInstance.String(), instance.String(), shardSetID) 372 } 373 } 374 } 375 } 376 377 if !p.IsSharded() { 378 return nil 379 } 380 381 // initializing could be more than leaving for cases like initial placement 382 if totalLeaving > totalInit { 383 return fmt.Errorf("invalid placement, %d shards in Leaving state, more than %d in Initializing state", totalLeaving, totalInit) 384 } 385 386 if totalLeaving != totalInitWithSourceID { 387 return fmt.Errorf("invalid placement, %d shards in Leaving state, not equal %d in Initializing state with source id", totalLeaving, totalInitWithSourceID) 388 } 389 390 if expectedTotal != totalCapacity { 391 return fmt.Errorf("invalid placement, the total available shards in the placement is %d, expecting %d", totalCapacity, expectedTotal) 392 } 393 394 for shard, c := range shardCountMap { 395 if p.ReplicaFactor() != c { 396 return fmt.Errorf("invalid shard count for shard %d: expected %d, actual %d", shard, p.ReplicaFactor(), c) 397 } 398 } 399 return nil 400 } 401 402 func convertShardSliceToMap(ids []uint32) map[uint32]int { 403 shardCounts := make(map[uint32]int) 404 for _, id := range ids { 405 shardCounts[id] = 0 406 } 407 return shardCounts 408 } 409 410 // NewInstance returns a new Instance 411 func NewInstance() Instance { 412 return &instance{shards: shard.NewShards(nil)} 413 } 414 415 // NewEmptyInstance returns a Instance with some basic properties but no shards assigned 416 func NewEmptyInstance(id, isolationGroup, zone, endpoint string, weight uint32) Instance { 417 return &instance{ 418 id: id, 419 isolationGroup: isolationGroup, 420 zone: zone, 421 weight: weight, 422 endpoint: endpoint, 423 shards: shard.NewShards(nil), 424 } 425 } 426 427 // NewInstanceFromProto creates a new placement instance from proto. 428 func NewInstanceFromProto(instance *placementpb.Instance) (Instance, error) { 429 if instance == nil { 430 return nil, errNilPlacementInstanceProto 431 } 432 shards, err := shard.NewShardsFromProto(instance.Shards) 433 if err != nil { 434 return nil, err 435 } 436 debugPort := uint32(0) 437 if instance.Metadata != nil { 438 debugPort = instance.Metadata.DebugPort 439 } 440 441 return NewInstance(). 442 SetID(instance.Id). 443 SetIsolationGroup(instance.IsolationGroup). 444 SetWeight(instance.Weight). 445 SetZone(instance.Zone). 446 SetEndpoint(instance.Endpoint). 447 SetShards(shards). 448 SetShardSetID(instance.ShardSetId). 449 SetHostname(instance.Hostname). 450 SetPort(instance.Port). 451 SetMetadata(InstanceMetadata{ 452 DebugPort: debugPort, 453 }), nil 454 } 455 456 type instance struct { 457 id string 458 isolationGroup string 459 zone string 460 endpoint string 461 hostname string 462 shards shard.Shards 463 port uint32 464 weight uint32 465 shardSetID uint32 466 metadata InstanceMetadata 467 } 468 469 func (i *instance) String() string { 470 return fmt.Sprintf( 471 "Instance[ID=%s, IsolationGroup=%s, Zone=%s, Weight=%d, Endpoint=%s, Hostname=%s, Port=%d, ShardSetID=%d, Shards=%s, Metadata=%+v]", 472 i.id, i.isolationGroup, i.zone, i.weight, i.endpoint, i.hostname, i.port, i.shardSetID, i.shards.String(), i.metadata, 473 ) 474 } 475 476 func (i *instance) ID() string { 477 return i.id 478 } 479 480 func (i *instance) SetID(id string) Instance { 481 i.id = id 482 return i 483 } 484 485 func (i *instance) IsolationGroup() string { 486 return i.isolationGroup 487 } 488 489 func (i *instance) SetIsolationGroup(r string) Instance { 490 i.isolationGroup = r 491 return i 492 } 493 494 func (i *instance) Zone() string { 495 return i.zone 496 } 497 498 func (i *instance) SetZone(z string) Instance { 499 i.zone = z 500 return i 501 } 502 503 func (i *instance) Weight() uint32 { 504 return i.weight 505 } 506 507 func (i *instance) SetWeight(w uint32) Instance { 508 i.weight = w 509 return i 510 } 511 512 func (i *instance) Endpoint() string { 513 return i.endpoint 514 } 515 516 func (i *instance) SetEndpoint(ip string) Instance { 517 i.endpoint = ip 518 return i 519 } 520 521 func (i *instance) Hostname() string { 522 return i.hostname 523 } 524 525 func (i *instance) SetHostname(value string) Instance { 526 i.hostname = value 527 return i 528 } 529 530 func (i *instance) Port() uint32 { 531 return i.port 532 } 533 534 func (i *instance) SetPort(value uint32) Instance { 535 i.port = value 536 return i 537 } 538 539 func (i *instance) ShardSetID() uint32 { 540 return i.shardSetID 541 } 542 543 func (i *instance) SetShardSetID(value uint32) Instance { 544 i.shardSetID = value 545 return i 546 } 547 548 func (i *instance) Shards() shard.Shards { 549 return i.shards 550 } 551 552 func (i *instance) SetShards(s shard.Shards) Instance { 553 i.shards = s 554 return i 555 } 556 557 func (i *instance) Metadata() InstanceMetadata { 558 return i.metadata 559 } 560 561 func (i *instance) SetMetadata(value InstanceMetadata) Instance { 562 i.metadata = value 563 return i 564 } 565 566 func (i *instance) Proto() (*placementpb.Instance, error) { 567 ss, err := i.Shards().Proto() 568 if err != nil { 569 return &placementpb.Instance{}, err 570 } 571 572 return &placementpb.Instance{ 573 Id: i.ID(), 574 IsolationGroup: i.IsolationGroup(), 575 Zone: i.Zone(), 576 Weight: i.Weight(), 577 Endpoint: i.Endpoint(), 578 Shards: ss, 579 ShardSetId: i.ShardSetID(), 580 Hostname: i.Hostname(), 581 Port: i.Port(), 582 Metadata: &placementpb.InstanceMetadata{ 583 DebugPort: i.Metadata().DebugPort, 584 }, 585 }, nil 586 } 587 588 func (i *instance) IsLeaving() bool { 589 return i.allShardsInState(shard.Leaving) 590 } 591 592 func (i *instance) IsInitializing() bool { 593 return i.allShardsInState(shard.Initializing) 594 } 595 596 func (i *instance) IsAvailable() bool { 597 return i.allShardsInState(shard.Available) 598 } 599 600 func (i *instance) allShardsInState(s shard.State) bool { 601 ss := i.Shards() 602 numShards := ss.NumShards() 603 if numShards == 0 { 604 return false 605 } 606 return numShards == ss.NumShardsForState(s) 607 } 608 609 func (i *instance) Clone() Instance { 610 return NewInstance(). 611 SetID(i.ID()). 612 SetIsolationGroup(i.IsolationGroup()). 613 SetZone(i.Zone()). 614 SetWeight(i.Weight()). 615 SetEndpoint(i.Endpoint()). 616 SetHostname(i.Hostname()). 617 SetPort(i.Port()). 618 SetShardSetID(i.ShardSetID()). 619 SetShards(i.Shards().Clone()). 620 SetMetadata(i.Metadata()) 621 } 622 623 // Instances is a slice of instances that can produce a debug string. 624 type Instances []Instance 625 626 func (instances Instances) String() string { 627 if len(instances) == 0 { 628 return "[]" 629 } 630 // 256 should be pretty sufficient for the string representation 631 // of each instance. 632 strs := make([]string, 0, len(instances)*256) 633 strs = append(strs, "[\n") 634 for _, elem := range instances { 635 strs = append(strs, "\t"+elem.String()+",\n") 636 } 637 strs = append(strs, "]") 638 return strings.Join(strs, "") 639 } 640 641 // Clone returns a set of cloned instances. 642 func (instances Instances) Clone() Instances { 643 cloned := make([]Instance, len(instances)) 644 for i, instance := range instances { 645 cloned[i] = instance.Clone() 646 } 647 return cloned 648 } 649 650 // ByIDAscending sorts Instance by ID ascending 651 type ByIDAscending []Instance 652 653 func (s ByIDAscending) Len() int { 654 return len(s) 655 } 656 657 func (s ByIDAscending) Less(i, j int) bool { 658 return strings.Compare(s[i].ID(), s[j].ID()) < 0 659 } 660 661 func (s ByIDAscending) Swap(i, j int) { 662 s[i], s[j] = s[j], s[i] 663 }