github.com/m3db/m3@v1.5.0/src/cluster/placement/types.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package placement 22 23 import ( 24 "time" 25 26 "github.com/m3db/m3/src/cluster/generated/proto/placementpb" 27 "github.com/m3db/m3/src/cluster/kv" 28 "github.com/m3db/m3/src/cluster/shard" 29 "github.com/m3db/m3/src/x/clock" 30 "github.com/m3db/m3/src/x/instrument" 31 32 "github.com/golang/protobuf/proto" 33 ) 34 35 // Instance represents an instance in a placement. 36 type Instance interface { 37 // String is for debugging. 38 String() string 39 40 // ID is the id of the instance. 41 ID() string 42 43 // SetID sets the id of the instance. 44 SetID(id string) Instance 45 46 // IsolationGroup is the isolation group of the instance, 47 // which usually but not necessarily refers to the rack of the instance. 48 IsolationGroup() string 49 50 // SetIsolationGroup sets the isolation group of the instance. 51 SetIsolationGroup(r string) Instance 52 53 // Zone is the zone of the instance. 54 Zone() string 55 56 // SetZone sets the zone of the instance. 57 SetZone(z string) Instance 58 59 // Weight is the weight of the instance. 60 Weight() uint32 61 62 // SetWeight sets the weight of the instance. 63 SetWeight(w uint32) Instance 64 65 // Endpoint is the endpoint of the instance. 66 Endpoint() string 67 68 // SetEndpoint sets the endpoint of the instance. 69 SetEndpoint(ip string) Instance 70 71 // Shards returns the shards owned by the instance. 72 Shards() shard.Shards 73 74 // SetShards sets the shards owned by the instance. 75 SetShards(s shard.Shards) Instance 76 77 // ShardSetID returns the shard set id. 78 ShardSetID() uint32 79 80 // SetShardSetID sets the shard set id. 81 SetShardSetID(value uint32) Instance 82 83 // Hostname returns the hostname of the instance. 84 Hostname() string 85 86 // SetHostname sets the hostname of the instance. 87 SetHostname(value string) Instance 88 89 // Port returns the port of the instance. 90 Port() uint32 91 92 // SetPort sets the port of the instance. 93 SetPort(value uint32) Instance 94 95 // Metadata returns the metadata of the instance. 96 Metadata() InstanceMetadata 97 98 // SetMetadata sets the metadata of the instance. 99 SetMetadata(value InstanceMetadata) Instance 100 101 // Proto returns the proto representation for the Instance. 102 Proto() (*placementpb.Instance, error) 103 104 // IsLeaving returns whether the instance contains only leaving shards. 105 IsLeaving() bool 106 107 // IsInitializing returns whether the instance contains only initializing shards. 108 IsInitializing() bool 109 110 // IsAvailable returns whether the instance contains only available shards. 111 IsAvailable() bool 112 113 // Clone returns a clone of the Instance. 114 Clone() Instance 115 } 116 117 // InstanceMetadata represents the metadata for a single Instance in the placement. 118 type InstanceMetadata struct { 119 DebugPort uint32 120 } 121 122 // Placement describes how instances are placed. 123 type Placement interface { 124 // InstancesForShard returns the instances for a given shard id. 125 InstancesForShard(shard uint32) []Instance 126 127 // Instances returns all instances in the placement 128 Instances() []Instance 129 130 // SetInstances sets the instances 131 SetInstances(instances []Instance) Placement 132 133 // NumInstances returns the number of instances in the placement 134 NumInstances() int 135 136 // Instance returns the instance for the requested id 137 Instance(id string) (Instance, bool) 138 139 // ReplicaFactor returns the replica factor in the placement 140 ReplicaFactor() int 141 142 // SetReplicaFactor sets the ReplicaFactor 143 SetReplicaFactor(rf int) Placement 144 145 // Shards returns all the unique shard ids for a replica 146 Shards() []uint32 147 148 // SetShards sets the unique shard ids for a replica 149 SetShards(s []uint32) Placement 150 151 // NumShards returns the number of shards in a replica 152 NumShards() int 153 154 // IsSharded returns whether this placement is sharded 155 IsSharded() bool 156 157 // SetIsSharded sets IsSharded 158 SetIsSharded(v bool) Placement 159 160 // CutoverNanos returns the cutover time in nanoseconds. 161 CutoverNanos() int64 162 163 // SetCutoverNanos sets the cutover time in nanoseconds. 164 SetCutoverNanos(cutoverNanos int64) Placement 165 166 // IsMirrored returns whether the placement is mirrored. 167 IsMirrored() bool 168 169 // SetIsMirrored sets IsMirrored. 170 SetIsMirrored(v bool) Placement 171 172 // MaxShardSetID returns the maximum shard set id used before to guarantee unique 173 // shard set id generations across placement changes. 174 MaxShardSetID() uint32 175 176 // SetMaxShardSetID sets the maximum shard set id used before to guarantee unique 177 // shard set id generations across placement changes. 178 SetMaxShardSetID(value uint32) Placement 179 180 // String returns a description of the placement 181 String() string 182 183 // Version returns the version of the placement retrieved from the backing MVCC store. 184 Version() int 185 186 // SetVersion sets the version of the placement object. Since version 187 // is determined by the backing MVCC store, calling this method has no 188 // effect in terms of the updated ServicePlacement that is written back 189 // to the MVCC store. 190 SetVersion(v int) Placement 191 192 // Proto returns the proto representation for the Placement. 193 Proto() (*placementpb.Placement, error) 194 195 // Clone returns a clone of the Placement. 196 Clone() Placement 197 } 198 199 // Watch watches for updates of a placement. 200 type Watch interface { 201 // C returns the notification channel. 202 C() <-chan struct{} 203 204 // Get returns the latest version of the placement. 205 Get() (Placement, error) 206 207 // Close stops watching for placement updates. 208 Close() 209 } 210 211 // Watcher watches for updates of the placement. Unlike above type Watch, 212 // it notifies the client of placement changes via a callback function. 213 type Watcher interface { 214 // Watch starts watching the updates. 215 Watch() error 216 217 // Get returns the latest version of the placement. 218 Get() (Placement, error) 219 220 // Unwatch stops watching the updates. 221 Unwatch() error 222 } 223 224 // OnPlacementChangedFn is called when placement has changed in the store, 225 // or when it is loaded first time when watcher starts. 226 // In the latter case, the prev value is nil. 227 type OnPlacementChangedFn func(prev, curr Placement) 228 229 // WatcherOptions provide a set of placement watcher options. 230 type WatcherOptions interface { 231 // SetInstrumentOptions sets the instrument options. 232 SetInstrumentOptions(value instrument.Options) WatcherOptions 233 234 // InstrumentOptions returns the instrument options. 235 InstrumentOptions() instrument.Options 236 237 // SetStagedPlacementKey sets the kv key to watch for staged placement. 238 SetStagedPlacementKey(value string) WatcherOptions 239 240 // StagedPlacementKey returns the kv key to watch for staged placement. 241 StagedPlacementKey() string 242 243 // SetStagedPlacementStore sets the staged placement store. 244 SetStagedPlacementStore(store kv.Store) WatcherOptions 245 246 // StagedPlacementStore returns the staged placement store. 247 StagedPlacementStore() kv.Store 248 249 // SetInitWatchTimeout sets the initial watch timeout. 250 SetInitWatchTimeout(value time.Duration) WatcherOptions 251 252 // InitWatchTimeout returns the initial watch timeout. 253 InitWatchTimeout() time.Duration 254 255 // SetOnPlacementChangedFn sets the callback function for placement change. 256 SetOnPlacementChangedFn(value OnPlacementChangedFn) WatcherOptions 257 258 // OnPlacementChangedFn returns the callback function for placement change. 259 OnPlacementChangedFn() OnPlacementChangedFn 260 } 261 262 // TimeNanosFn returns the time in the format of Unix nanoseconds. 263 type TimeNanosFn func() int64 264 265 // ShardValidateFn validates the shard. 266 type ShardValidateFn func(s shard.Shard) error 267 268 // ValidateFn validates the placement. 269 type ValidateFn func(p Placement) error 270 271 // Options is the interface for placement options. 272 type Options interface { 273 // AllowPartialReplace allows shards from the leaving instance to be 274 // placed on instances other than the new instances in a replace operation 275 AllowPartialReplace() bool 276 277 // SetAllowPartialReplace sets AllowPartialReplace. 278 SetAllowPartialReplace(allowPartialReplace bool) Options 279 280 // AllowAllZones will enable the placement to contain hosts that 281 // are not contained within the same zone of the actual placement. This is 282 // needed for services that require cross zone communication. 283 AllowAllZones() bool 284 285 // SetAllowAllZones sets AllowAllZones. 286 SetAllowAllZones(allowAllZones bool) Options 287 288 // AddAllCandidates determines whether the placement will attempt to add all 289 // candidates when adding instances or just a single one. 290 AddAllCandidates() bool 291 292 // SetAddAllCandidates sets AddAllCandidates. 293 SetAddAllCandidates(addAllCandidates bool) Options 294 295 // InstanceSelector defines the strategy used to select new instances from a list of 296 // candidates when adding or replacing nodes in the placement. The default is determined 297 // by IsMirrored(); false => selector.NewNonMirroredSelector, true => NewPortMirroredSelector. 298 InstanceSelector() InstanceSelector 299 300 // SetInstanceSelector -- see InstanceSelector. 301 SetInstanceSelector(s InstanceSelector) Options 302 303 // IsSharded describes whether a placement needs to be sharded, 304 // when set to false, no specific shards will be assigned to any instance. 305 IsSharded() bool 306 307 // SetIsSharded sets IsSharded. 308 SetIsSharded(sharded bool) Options 309 310 // ShardStateMode describes the mode to manage shard state in the placement. 311 ShardStateMode() ShardStateMode 312 313 // SetShardStateMode sets ShardStateMode. 314 SetShardStateMode(value ShardStateMode) Options 315 316 // Dryrun will try to perform the placement operation but will not persist the final result. 317 Dryrun() bool 318 319 // SetDryrun sets whether the Dryrun value. 320 SetDryrun(d bool) Options 321 322 // IsMirrored returns whether the shard distribution should be mirrored 323 // to support master/slave model. 324 IsMirrored() bool 325 326 // SetIsMirrored sets IsMirrored. 327 SetIsMirrored(m bool) Options 328 329 // SkipPortMirroring returns whether to ignore the port numbers while selecting 330 // mirroring instances. 331 SkipPortMirroring() bool 332 333 // SetSkipPortMirroring sets whether to ignore the port numbers while selecting 334 // mirroring instances. 335 SetSkipPortMirroring(v bool) Options 336 337 // IsStaged returns whether the placement should keep all the snapshots. 338 IsStaged() bool 339 340 // SetIsStaged sets whether the placement should keep all the snapshots. 341 SetIsStaged(v bool) Options 342 343 // Compress returns whether the placement is compressed when written to storage. 344 Compress() bool 345 346 // SetCompress sets whether the placement is compressed when written to storage. 347 SetCompress(v bool) Options 348 349 // InstrumentOptions is the options for instrument. 350 InstrumentOptions() instrument.Options 351 352 // SetInstrumentOptions sets the instrument options. 353 SetInstrumentOptions(iopts instrument.Options) Options 354 355 // ValidZone returns the zone that added instances must be in in order 356 // to be added to a placement. 357 ValidZone() string 358 359 // SetValidZone sets the zone that added instances must be in in order to 360 // be added to a placement. By default the valid zone will be the zone of 361 // instances already in a placement, however if a placement is empty then 362 // it is necessary to specify the valid zone when adding the first 363 // instance. 364 SetValidZone(z string) Options 365 366 // PlacementCutoverNanosFn returns the TimeNanosFn for placement cutover time. 367 PlacementCutoverNanosFn() TimeNanosFn 368 369 // SetPlacementCutoverNanosFn sets the TimeNanosFn for placement cutover time. 370 SetPlacementCutoverNanosFn(fn TimeNanosFn) Options 371 372 // ShardCutoverNanosFn returns the TimeNanosFn for shard cutover time. 373 ShardCutoverNanosFn() TimeNanosFn 374 375 // SetShardCutoverNanosFn sets the TimeNanosFn for shard cutover time. 376 SetShardCutoverNanosFn(fn TimeNanosFn) Options 377 378 // ShardCutoffNanosFn returns the TimeNanosFn for shard cutoff time. 379 ShardCutoffNanosFn() TimeNanosFn 380 381 // SetShardCutoffNanosFn sets the TimeNanosFn for shard cutoff time. 382 SetShardCutoffNanosFn(fn TimeNanosFn) Options 383 384 // IsShardCutoverFn returns the validation function for shard cutover. 385 IsShardCutoverFn() ShardValidateFn 386 387 // SetIsShardCutoverFn sets the validation function for shard cutover. 388 SetIsShardCutoverFn(fn ShardValidateFn) Options 389 390 // IsShardCutoffFn returns the validation function for shard cutoff. 391 IsShardCutoffFn() ShardValidateFn 392 393 // SetIsShardCutoffFn sets the validation function for shard cutoff. 394 SetIsShardCutoffFn(fn ShardValidateFn) Options 395 396 // ValidateFnBeforeUpdate returns the validate function to be applied before 397 // a placement update. 398 ValidateFnBeforeUpdate() ValidateFn 399 400 // SetValidateFnBeforeUpdate sets the validate function to be applied before 401 // a placement update. 402 SetValidateFnBeforeUpdate(fn ValidateFn) Options 403 404 // NowFn returns the function to get time now. 405 NowFn() clock.NowFn 406 407 // SetNowFn sets the function to get time now. 408 SetNowFn(fn clock.NowFn) Options 409 } 410 411 // ShardStateMode describes the way to manage shard state in the placement. 412 type ShardStateMode int 413 414 const ( 415 // StableShardStateOnly means the placement should only keep stable shard state. 416 StableShardStateOnly ShardStateMode = iota 417 418 // IncludeTransitionalShardStates means the placement will include transitional shard states. 419 IncludeTransitionalShardStates 420 ) 421 422 // Storage provides read and write access to placement. 423 type Storage interface { 424 // Set writes a placement. 425 Set(p Placement) (Placement, error) 426 427 // CheckAndSet writes a placement if the current version 428 // matches the expected version. 429 CheckAndSet(p Placement, version int) (Placement, error) 430 431 // SetIfNotExist writes a placement. 432 SetIfNotExist(p Placement) (Placement, error) 433 434 // Placement reads placement. 435 Placement() (Placement, error) 436 437 // Watch returns a watch for the placement updates. 438 Watch() (Watch, error) 439 440 // Delete deletes the placement. 441 Delete() error 442 443 // SetProto sets the proto as the placement. 444 SetProto(p proto.Message) (int, error) 445 446 // CheckAndSetProto writes a proto if the current version 447 // matches the expected version. 448 CheckAndSetProto(p proto.Message, version int) (int, error) 449 450 // Proto returns the placement proto. 451 Proto() (proto.Message, int, error) 452 453 // PlacementForVersion returns the placement of a specific version. 454 PlacementForVersion(version int) (Placement, error) 455 } 456 457 // Service handles the placement related operations for registered services 458 // all write or update operations will persist the generated placement before returning success. 459 type Service interface { 460 Storage 461 operations 462 } 463 464 // Operator is a purely in-memory version of Service; it applies placement related operations to 465 // a local copy of a placement without persisting anything to backing storage. This can be useful 466 // to apply multiple placement operations in a row before persisting them, e.g.: 467 // 468 // func DoMultipleOps(opts placement.Options, store placement.Storage) { 469 // curPlacement := store.Placement() 470 // op := placement.NewOperator(curPlacement, opts) 471 // op.ReplaceInstances(...) 472 // op.MarkAllShardsAvailable() 473 // store.CheckAndSet(op.Placement()) 474 // } 475 type Operator interface { 476 operations 477 478 Placement() Placement 479 } 480 481 // operations are the methods shared by Service and Operator. This type is private because it's 482 // not intended to be implemented directly; Operator and Service are the correct ways to access 483 // these methods. 484 type operations interface { 485 // BuildInitialPlacement initialize a placement. 486 BuildInitialPlacement(instances []Instance, numShards int, rf int) (Placement, error) 487 488 // AddReplica up the replica factor by 1 in the placement. 489 AddReplica() (Placement, error) 490 491 // AddInstances adds instances from the candidate list to the placement. 492 AddInstances(candidates []Instance) (newPlacement Placement, addedInstances []Instance, err error) 493 494 // RemoveInstances removes instances from the placement. 495 RemoveInstances(leavingInstanceIDs []string) (Placement, error) 496 497 // ReplaceInstances picks instances from the candidate list to replace instances in current placement. 498 ReplaceInstances( 499 leavingInstanceIDs []string, 500 candidates []Instance, 501 ) ( 502 newPlacement Placement, 503 usedInstances []Instance, 504 err error, 505 ) 506 507 // MarkShardsAvailable marks given shards as available. 508 MarkShardsAvailable(instanceID string, shardIDs ...uint32) (Placement, error) 509 510 // MarkInstanceAvailable marks all the shards on a given instance as available. 511 MarkInstanceAvailable(instanceID string) (Placement, error) 512 513 // MarkAllShardsAvailable marks shard states as available where applicable. 514 MarkAllShardsAvailable() (Placement, error) 515 516 // BalanceShards rebalances load in the cluster to achieve the most balanced shard distribution. 517 BalanceShards() (Placement, error) 518 } 519 520 // Algorithm places shards on instances. 521 type Algorithm interface { 522 // InitialPlacement initialize a sharding placement with given replica factor. 523 InitialPlacement(instances []Instance, shards []uint32, rf int) (Placement, error) 524 525 // AddReplica up the replica factor by 1 in the placement. 526 AddReplica(p Placement) (Placement, error) 527 528 // AddInstances adds a list of instance to the placement. 529 AddInstances(p Placement, instances []Instance) (Placement, error) 530 531 // RemoveInstances removes a list of instances from the placement. 532 RemoveInstances(p Placement, leavingInstanceIDs []string) (Placement, error) 533 534 // ReplaceInstances replace a list of instances with new instances. 535 ReplaceInstances( 536 p Placement, 537 leavingInstanecIDs []string, 538 addingInstances []Instance, 539 ) (Placement, error) 540 541 // IsCompatibleWith checks whether the algorithm could be applied to given placement. 542 IsCompatibleWith(p Placement) error 543 544 // MarkShardsAvailable marks given shards as available. 545 MarkShardsAvailable(p Placement, instanceID string, shardIDs ...uint32) (Placement, error) 546 547 // MarkAllShardsAvailable marks shard states as available where applicable. 548 MarkAllShardsAvailable(p Placement) (Placement, bool, error) 549 550 // BalanceShards rebalances load in the cluster to achieve the most balanced shard distribution. 551 BalanceShards(p Placement) (Placement, error) 552 } 553 554 // InstanceSelector selects valid instances for the placement change. 555 type InstanceSelector interface { 556 // SelectInitialInstances selects instances for the initial placement. 557 SelectInitialInstances( 558 candidates []Instance, 559 rf int, 560 ) ([]Instance, error) 561 562 // SelectAddingInstances selects instances to be added to the placement. 563 SelectAddingInstances( 564 candidates []Instance, 565 p Placement, 566 ) ([]Instance, error) 567 568 // SelectReplaceInstances selects instances to replace existing instances in the placement. 569 SelectReplaceInstances( 570 candidates []Instance, 571 leavingInstanceIDs []string, 572 p Placement, 573 ) ([]Instance, error) 574 } 575 576 // DeploymentPlanner generates deployment steps for a placement 577 type DeploymentPlanner interface { 578 // DeploymentSteps returns the deployment steps 579 DeploymentSteps(p Placement) [][]Instance 580 } 581 582 // DeploymentOptions provides options for DeploymentPlanner 583 type DeploymentOptions interface { 584 // MaxStepSize limits the number of instances to be deployed in one step 585 MaxStepSize() int 586 SetMaxStepSize(stepSize int) DeploymentOptions 587 }