github.com/m3db/m3@v1.5.0/src/cluster/placement/types.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package placement
    22  
    23  import (
    24  	"time"
    25  
    26  	"github.com/m3db/m3/src/cluster/generated/proto/placementpb"
    27  	"github.com/m3db/m3/src/cluster/kv"
    28  	"github.com/m3db/m3/src/cluster/shard"
    29  	"github.com/m3db/m3/src/x/clock"
    30  	"github.com/m3db/m3/src/x/instrument"
    31  
    32  	"github.com/golang/protobuf/proto"
    33  )
    34  
    35  // Instance represents an instance in a placement.
    36  type Instance interface {
    37  	// String is for debugging.
    38  	String() string
    39  
    40  	// ID is the id of the instance.
    41  	ID() string
    42  
    43  	// SetID sets the id of the instance.
    44  	SetID(id string) Instance
    45  
    46  	// IsolationGroup is the isolation group of the instance,
    47  	// which usually but not necessarily refers to the rack of the instance.
    48  	IsolationGroup() string
    49  
    50  	// SetIsolationGroup sets the isolation group of the instance.
    51  	SetIsolationGroup(r string) Instance
    52  
    53  	// Zone is the zone of the instance.
    54  	Zone() string
    55  
    56  	// SetZone sets the zone of the instance.
    57  	SetZone(z string) Instance
    58  
    59  	// Weight is the weight of the instance.
    60  	Weight() uint32
    61  
    62  	// SetWeight sets the weight of the instance.
    63  	SetWeight(w uint32) Instance
    64  
    65  	// Endpoint is the endpoint of the instance.
    66  	Endpoint() string
    67  
    68  	// SetEndpoint sets the endpoint of the instance.
    69  	SetEndpoint(ip string) Instance
    70  
    71  	// Shards returns the shards owned by the instance.
    72  	Shards() shard.Shards
    73  
    74  	// SetShards sets the shards owned by the instance.
    75  	SetShards(s shard.Shards) Instance
    76  
    77  	// ShardSetID returns the shard set id.
    78  	ShardSetID() uint32
    79  
    80  	// SetShardSetID sets the shard set id.
    81  	SetShardSetID(value uint32) Instance
    82  
    83  	// Hostname returns the hostname of the instance.
    84  	Hostname() string
    85  
    86  	// SetHostname sets the hostname of the instance.
    87  	SetHostname(value string) Instance
    88  
    89  	// Port returns the port of the instance.
    90  	Port() uint32
    91  
    92  	// SetPort sets the port of the instance.
    93  	SetPort(value uint32) Instance
    94  
    95  	// Metadata returns the metadata of the instance.
    96  	Metadata() InstanceMetadata
    97  
    98  	// SetMetadata sets the metadata of the instance.
    99  	SetMetadata(value InstanceMetadata) Instance
   100  
   101  	// Proto returns the proto representation for the Instance.
   102  	Proto() (*placementpb.Instance, error)
   103  
   104  	// IsLeaving returns whether the instance contains only leaving shards.
   105  	IsLeaving() bool
   106  
   107  	// IsInitializing returns whether the instance contains only initializing shards.
   108  	IsInitializing() bool
   109  
   110  	// IsAvailable returns whether the instance contains only available shards.
   111  	IsAvailable() bool
   112  
   113  	// Clone returns a clone of the Instance.
   114  	Clone() Instance
   115  }
   116  
   117  // InstanceMetadata represents the metadata for a single Instance in the placement.
   118  type InstanceMetadata struct {
   119  	DebugPort uint32
   120  }
   121  
   122  // Placement describes how instances are placed.
   123  type Placement interface {
   124  	// InstancesForShard returns the instances for a given shard id.
   125  	InstancesForShard(shard uint32) []Instance
   126  
   127  	// Instances returns all instances in the placement
   128  	Instances() []Instance
   129  
   130  	// SetInstances sets the instances
   131  	SetInstances(instances []Instance) Placement
   132  
   133  	// NumInstances returns the number of instances in the placement
   134  	NumInstances() int
   135  
   136  	// Instance returns the instance for the requested id
   137  	Instance(id string) (Instance, bool)
   138  
   139  	// ReplicaFactor returns the replica factor in the placement
   140  	ReplicaFactor() int
   141  
   142  	// SetReplicaFactor sets the ReplicaFactor
   143  	SetReplicaFactor(rf int) Placement
   144  
   145  	// Shards returns all the unique shard ids for a replica
   146  	Shards() []uint32
   147  
   148  	// SetShards sets the unique shard ids for a replica
   149  	SetShards(s []uint32) Placement
   150  
   151  	// NumShards returns the number of shards in a replica
   152  	NumShards() int
   153  
   154  	// IsSharded returns whether this placement is sharded
   155  	IsSharded() bool
   156  
   157  	// SetIsSharded sets IsSharded
   158  	SetIsSharded(v bool) Placement
   159  
   160  	// CutoverNanos returns the cutover time in nanoseconds.
   161  	CutoverNanos() int64
   162  
   163  	// SetCutoverNanos sets the cutover time in nanoseconds.
   164  	SetCutoverNanos(cutoverNanos int64) Placement
   165  
   166  	// IsMirrored returns whether the placement is mirrored.
   167  	IsMirrored() bool
   168  
   169  	// SetIsMirrored sets IsMirrored.
   170  	SetIsMirrored(v bool) Placement
   171  
   172  	// MaxShardSetID returns the maximum shard set id used before to guarantee unique
   173  	// shard set id generations across placement changes.
   174  	MaxShardSetID() uint32
   175  
   176  	// SetMaxShardSetID sets the maximum shard set id used before to guarantee unique
   177  	// shard set id generations across placement changes.
   178  	SetMaxShardSetID(value uint32) Placement
   179  
   180  	// String returns a description of the placement
   181  	String() string
   182  
   183  	// Version returns the version of the placement retrieved from the backing MVCC store.
   184  	Version() int
   185  
   186  	// SetVersion sets the version of the placement object. Since version
   187  	// is determined by the backing MVCC store, calling this method has no
   188  	// effect in terms of the updated ServicePlacement that is written back
   189  	// to the MVCC store.
   190  	SetVersion(v int) Placement
   191  
   192  	// Proto returns the proto representation for the Placement.
   193  	Proto() (*placementpb.Placement, error)
   194  
   195  	// Clone returns a clone of the Placement.
   196  	Clone() Placement
   197  }
   198  
   199  // Watch watches for updates of a placement.
   200  type Watch interface {
   201  	// C returns the notification channel.
   202  	C() <-chan struct{}
   203  
   204  	// Get returns the latest version of the placement.
   205  	Get() (Placement, error)
   206  
   207  	// Close stops watching for placement updates.
   208  	Close()
   209  }
   210  
   211  // Watcher watches for updates of the placement. Unlike above type Watch,
   212  // it notifies the client of placement changes via a callback function.
   213  type Watcher interface {
   214  	// Watch starts watching the updates.
   215  	Watch() error
   216  
   217  	// Get returns the latest version of the placement.
   218  	Get() (Placement, error)
   219  
   220  	// Unwatch stops watching the updates.
   221  	Unwatch() error
   222  }
   223  
   224  // OnPlacementChangedFn is called when placement has changed in the store,
   225  // or when it is loaded first time when watcher starts.
   226  // In the latter case, the prev value is nil.
   227  type OnPlacementChangedFn func(prev, curr Placement)
   228  
   229  // WatcherOptions provide a set of placement watcher options.
   230  type WatcherOptions interface {
   231  	// SetInstrumentOptions sets the instrument options.
   232  	SetInstrumentOptions(value instrument.Options) WatcherOptions
   233  
   234  	// InstrumentOptions returns the instrument options.
   235  	InstrumentOptions() instrument.Options
   236  
   237  	// SetStagedPlacementKey sets the kv key to watch for staged placement.
   238  	SetStagedPlacementKey(value string) WatcherOptions
   239  
   240  	// StagedPlacementKey returns the kv key to watch for staged placement.
   241  	StagedPlacementKey() string
   242  
   243  	// SetStagedPlacementStore sets the staged placement store.
   244  	SetStagedPlacementStore(store kv.Store) WatcherOptions
   245  
   246  	// StagedPlacementStore returns the staged placement store.
   247  	StagedPlacementStore() kv.Store
   248  
   249  	// SetInitWatchTimeout sets the initial watch timeout.
   250  	SetInitWatchTimeout(value time.Duration) WatcherOptions
   251  
   252  	// InitWatchTimeout returns the initial watch timeout.
   253  	InitWatchTimeout() time.Duration
   254  
   255  	// SetOnPlacementChangedFn sets the callback function for placement change.
   256  	SetOnPlacementChangedFn(value OnPlacementChangedFn) WatcherOptions
   257  
   258  	// OnPlacementChangedFn returns the callback function for placement change.
   259  	OnPlacementChangedFn() OnPlacementChangedFn
   260  }
   261  
   262  // TimeNanosFn returns the time in the format of Unix nanoseconds.
   263  type TimeNanosFn func() int64
   264  
   265  // ShardValidateFn validates the shard.
   266  type ShardValidateFn func(s shard.Shard) error
   267  
   268  // ValidateFn validates the placement.
   269  type ValidateFn func(p Placement) error
   270  
   271  // Options is the interface for placement options.
   272  type Options interface {
   273  	// AllowPartialReplace allows shards from the leaving instance to be
   274  	// placed on instances other than the new instances in a replace operation
   275  	AllowPartialReplace() bool
   276  
   277  	// SetAllowPartialReplace sets AllowPartialReplace.
   278  	SetAllowPartialReplace(allowPartialReplace bool) Options
   279  
   280  	// AllowAllZones will enable the placement to contain hosts that
   281  	// are not contained within the same zone of the actual placement. This is
   282  	// needed for services that require cross zone communication.
   283  	AllowAllZones() bool
   284  
   285  	// SetAllowAllZones sets AllowAllZones.
   286  	SetAllowAllZones(allowAllZones bool) Options
   287  
   288  	// AddAllCandidates determines whether the placement will attempt to add all
   289  	// candidates when adding instances or just a single one.
   290  	AddAllCandidates() bool
   291  
   292  	// SetAddAllCandidates sets AddAllCandidates.
   293  	SetAddAllCandidates(addAllCandidates bool) Options
   294  
   295  	// InstanceSelector defines the strategy used to select new instances from a list of
   296  	// candidates when adding or replacing nodes in the placement. The default is determined
   297  	// by IsMirrored(); false => selector.NewNonMirroredSelector, true => NewPortMirroredSelector.
   298  	InstanceSelector() InstanceSelector
   299  
   300  	// SetInstanceSelector -- see InstanceSelector.
   301  	SetInstanceSelector(s InstanceSelector) Options
   302  
   303  	// IsSharded describes whether a placement needs to be sharded,
   304  	// when set to false, no specific shards will be assigned to any instance.
   305  	IsSharded() bool
   306  
   307  	// SetIsSharded sets IsSharded.
   308  	SetIsSharded(sharded bool) Options
   309  
   310  	// ShardStateMode describes the mode to manage shard state in the placement.
   311  	ShardStateMode() ShardStateMode
   312  
   313  	// SetShardStateMode sets ShardStateMode.
   314  	SetShardStateMode(value ShardStateMode) Options
   315  
   316  	// Dryrun will try to perform the placement operation but will not persist the final result.
   317  	Dryrun() bool
   318  
   319  	// SetDryrun sets whether the Dryrun value.
   320  	SetDryrun(d bool) Options
   321  
   322  	// IsMirrored returns whether the shard distribution should be mirrored
   323  	// to support master/slave model.
   324  	IsMirrored() bool
   325  
   326  	// SetIsMirrored sets IsMirrored.
   327  	SetIsMirrored(m bool) Options
   328  
   329  	// SkipPortMirroring returns whether to ignore the port numbers while selecting
   330  	// mirroring instances.
   331  	SkipPortMirroring() bool
   332  
   333  	// SetSkipPortMirroring sets whether to ignore the port numbers while selecting
   334  	// mirroring instances.
   335  	SetSkipPortMirroring(v bool) Options
   336  
   337  	// IsStaged returns whether the placement should keep all the snapshots.
   338  	IsStaged() bool
   339  
   340  	// SetIsStaged sets whether the placement should keep all the snapshots.
   341  	SetIsStaged(v bool) Options
   342  
   343  	// Compress returns whether the placement is compressed when written to storage.
   344  	Compress() bool
   345  
   346  	// SetCompress sets whether the placement is compressed when written to storage.
   347  	SetCompress(v bool) Options
   348  
   349  	// InstrumentOptions is the options for instrument.
   350  	InstrumentOptions() instrument.Options
   351  
   352  	// SetInstrumentOptions sets the instrument options.
   353  	SetInstrumentOptions(iopts instrument.Options) Options
   354  
   355  	// ValidZone returns the zone that added instances must be in in order
   356  	// to be added to a placement.
   357  	ValidZone() string
   358  
   359  	// SetValidZone sets the zone that added instances must be in in order to
   360  	// be added to a placement. By default the valid zone will be the zone of
   361  	// instances already in a placement, however if a placement is empty then
   362  	// it is necessary to specify the valid zone when adding the first
   363  	// instance.
   364  	SetValidZone(z string) Options
   365  
   366  	// PlacementCutoverNanosFn returns the TimeNanosFn for placement cutover time.
   367  	PlacementCutoverNanosFn() TimeNanosFn
   368  
   369  	// SetPlacementCutoverNanosFn sets the TimeNanosFn for placement cutover time.
   370  	SetPlacementCutoverNanosFn(fn TimeNanosFn) Options
   371  
   372  	// ShardCutoverNanosFn returns the TimeNanosFn for shard cutover time.
   373  	ShardCutoverNanosFn() TimeNanosFn
   374  
   375  	// SetShardCutoverNanosFn sets the TimeNanosFn for shard cutover time.
   376  	SetShardCutoverNanosFn(fn TimeNanosFn) Options
   377  
   378  	// ShardCutoffNanosFn returns the TimeNanosFn for shard cutoff time.
   379  	ShardCutoffNanosFn() TimeNanosFn
   380  
   381  	// SetShardCutoffNanosFn sets the TimeNanosFn for shard cutoff time.
   382  	SetShardCutoffNanosFn(fn TimeNanosFn) Options
   383  
   384  	// IsShardCutoverFn returns the validation function for shard cutover.
   385  	IsShardCutoverFn() ShardValidateFn
   386  
   387  	// SetIsShardCutoverFn sets the validation function for shard cutover.
   388  	SetIsShardCutoverFn(fn ShardValidateFn) Options
   389  
   390  	// IsShardCutoffFn returns the validation function for shard cutoff.
   391  	IsShardCutoffFn() ShardValidateFn
   392  
   393  	// SetIsShardCutoffFn sets the validation function for shard cutoff.
   394  	SetIsShardCutoffFn(fn ShardValidateFn) Options
   395  
   396  	// ValidateFnBeforeUpdate returns the validate function to be applied before
   397  	// a placement update.
   398  	ValidateFnBeforeUpdate() ValidateFn
   399  
   400  	// SetValidateFnBeforeUpdate sets the validate function to be applied before
   401  	// a placement update.
   402  	SetValidateFnBeforeUpdate(fn ValidateFn) Options
   403  
   404  	// NowFn returns the function to get time now.
   405  	NowFn() clock.NowFn
   406  
   407  	// SetNowFn sets the function to get time now.
   408  	SetNowFn(fn clock.NowFn) Options
   409  }
   410  
   411  // ShardStateMode describes the way to manage shard state in the placement.
   412  type ShardStateMode int
   413  
   414  const (
   415  	// StableShardStateOnly means the placement should only keep stable shard state.
   416  	StableShardStateOnly ShardStateMode = iota
   417  
   418  	// IncludeTransitionalShardStates means the placement will include transitional shard states.
   419  	IncludeTransitionalShardStates
   420  )
   421  
   422  // Storage provides read and write access to placement.
   423  type Storage interface {
   424  	// Set writes a placement.
   425  	Set(p Placement) (Placement, error)
   426  
   427  	// CheckAndSet writes a placement if the current version
   428  	// matches the expected version.
   429  	CheckAndSet(p Placement, version int) (Placement, error)
   430  
   431  	// SetIfNotExist writes a placement.
   432  	SetIfNotExist(p Placement) (Placement, error)
   433  
   434  	// Placement reads placement.
   435  	Placement() (Placement, error)
   436  
   437  	// Watch returns a watch for the placement updates.
   438  	Watch() (Watch, error)
   439  
   440  	// Delete deletes the placement.
   441  	Delete() error
   442  
   443  	// SetProto sets the proto as the placement.
   444  	SetProto(p proto.Message) (int, error)
   445  
   446  	// CheckAndSetProto writes a proto if the current version
   447  	// matches the expected version.
   448  	CheckAndSetProto(p proto.Message, version int) (int, error)
   449  
   450  	// Proto returns the placement proto.
   451  	Proto() (proto.Message, int, error)
   452  
   453  	// PlacementForVersion returns the placement of a specific version.
   454  	PlacementForVersion(version int) (Placement, error)
   455  }
   456  
   457  // Service handles the placement related operations for registered services
   458  // all write or update operations will persist the generated placement before returning success.
   459  type Service interface {
   460  	Storage
   461  	operations
   462  }
   463  
   464  // Operator is a purely in-memory version of Service; it applies placement related operations to
   465  // a local copy of a placement without persisting anything to backing storage. This can be useful
   466  // to apply multiple placement operations in a row before persisting them, e.g.:
   467  //
   468  // func DoMultipleOps(opts placement.Options, store placement.Storage) {
   469  //    curPlacement := store.Placement()
   470  //    op := placement.NewOperator(curPlacement, opts)
   471  //    op.ReplaceInstances(...)
   472  //    op.MarkAllShardsAvailable()
   473  //    store.CheckAndSet(op.Placement())
   474  // }
   475  type Operator interface {
   476  	operations
   477  
   478  	Placement() Placement
   479  }
   480  
   481  // operations are the methods shared by Service and Operator. This type is private because it's
   482  // not intended to be implemented directly; Operator and Service are the correct ways to access
   483  // these methods.
   484  type operations interface {
   485  	// BuildInitialPlacement initialize a placement.
   486  	BuildInitialPlacement(instances []Instance, numShards int, rf int) (Placement, error)
   487  
   488  	// AddReplica up the replica factor by 1 in the placement.
   489  	AddReplica() (Placement, error)
   490  
   491  	// AddInstances adds instances from the candidate list to the placement.
   492  	AddInstances(candidates []Instance) (newPlacement Placement, addedInstances []Instance, err error)
   493  
   494  	// RemoveInstances removes instances from the placement.
   495  	RemoveInstances(leavingInstanceIDs []string) (Placement, error)
   496  
   497  	// ReplaceInstances picks instances from the candidate list to replace instances in current placement.
   498  	ReplaceInstances(
   499  		leavingInstanceIDs []string,
   500  		candidates []Instance,
   501  	) (
   502  		newPlacement Placement,
   503  		usedInstances []Instance,
   504  		err error,
   505  	)
   506  
   507  	// MarkShardsAvailable marks given shards as available.
   508  	MarkShardsAvailable(instanceID string, shardIDs ...uint32) (Placement, error)
   509  
   510  	// MarkInstanceAvailable marks all the shards on a given instance as available.
   511  	MarkInstanceAvailable(instanceID string) (Placement, error)
   512  
   513  	// MarkAllShardsAvailable marks shard states as available where applicable.
   514  	MarkAllShardsAvailable() (Placement, error)
   515  
   516  	// BalanceShards rebalances load in the cluster to achieve the most balanced shard distribution.
   517  	BalanceShards() (Placement, error)
   518  }
   519  
   520  // Algorithm places shards on instances.
   521  type Algorithm interface {
   522  	// InitialPlacement initialize a sharding placement with given replica factor.
   523  	InitialPlacement(instances []Instance, shards []uint32, rf int) (Placement, error)
   524  
   525  	// AddReplica up the replica factor by 1 in the placement.
   526  	AddReplica(p Placement) (Placement, error)
   527  
   528  	// AddInstances adds a list of instance to the placement.
   529  	AddInstances(p Placement, instances []Instance) (Placement, error)
   530  
   531  	// RemoveInstances removes a list of instances from the placement.
   532  	RemoveInstances(p Placement, leavingInstanceIDs []string) (Placement, error)
   533  
   534  	// ReplaceInstances replace a list of instances with new instances.
   535  	ReplaceInstances(
   536  		p Placement,
   537  		leavingInstanecIDs []string,
   538  		addingInstances []Instance,
   539  	) (Placement, error)
   540  
   541  	// IsCompatibleWith checks whether the algorithm could be applied to given placement.
   542  	IsCompatibleWith(p Placement) error
   543  
   544  	// MarkShardsAvailable marks given shards as available.
   545  	MarkShardsAvailable(p Placement, instanceID string, shardIDs ...uint32) (Placement, error)
   546  
   547  	// MarkAllShardsAvailable marks shard states as available where applicable.
   548  	MarkAllShardsAvailable(p Placement) (Placement, bool, error)
   549  
   550  	// BalanceShards rebalances load in the cluster to achieve the most balanced shard distribution.
   551  	BalanceShards(p Placement) (Placement, error)
   552  }
   553  
   554  // InstanceSelector selects valid instances for the placement change.
   555  type InstanceSelector interface {
   556  	// SelectInitialInstances selects instances for the initial placement.
   557  	SelectInitialInstances(
   558  		candidates []Instance,
   559  		rf int,
   560  	) ([]Instance, error)
   561  
   562  	// SelectAddingInstances selects instances to be added to the placement.
   563  	SelectAddingInstances(
   564  		candidates []Instance,
   565  		p Placement,
   566  	) ([]Instance, error)
   567  
   568  	// SelectReplaceInstances selects instances to replace existing instances in the placement.
   569  	SelectReplaceInstances(
   570  		candidates []Instance,
   571  		leavingInstanceIDs []string,
   572  		p Placement,
   573  	) ([]Instance, error)
   574  }
   575  
   576  // DeploymentPlanner generates deployment steps for a placement
   577  type DeploymentPlanner interface {
   578  	// DeploymentSteps returns the deployment steps
   579  	DeploymentSteps(p Placement) [][]Instance
   580  }
   581  
   582  // DeploymentOptions provides options for DeploymentPlanner
   583  type DeploymentOptions interface {
   584  	// MaxStepSize limits the number of instances to be deployed in one step
   585  	MaxStepSize() int
   586  	SetMaxStepSize(stepSize int) DeploymentOptions
   587  }