github.com/m3db/m3@v1.5.0/src/cluster/placement/placement.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package placement
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"sort"
    27  	"strings"
    28  
    29  	"github.com/m3db/m3/src/cluster/generated/proto/placementpb"
    30  	"github.com/m3db/m3/src/cluster/shard"
    31  	xerrors "github.com/m3db/m3/src/x/errors"
    32  )
    33  
    34  const (
    35  	// uninitializedShardSetID represents uninitialized shard set id.
    36  	uninitializedShardSetID = 0
    37  )
    38  
    39  var (
    40  	errNilPlacementProto         = errors.New("nil placement proto")
    41  	errNilPlacementInstanceProto = errors.New("nil placement instance proto")
    42  	errDuplicatedShards          = errors.New("invalid placement, there are duplicated shards in one replica")
    43  	errUnexpectedShards          = errors.New("invalid placement, there are unexpected shard ids on instance")
    44  	errMirrorNotSharded          = errors.New("invalid placement, mirrored placement must be sharded")
    45  )
    46  
    47  type placement struct {
    48  	instances        map[string]Instance
    49  	instancesByShard map[uint32][]Instance
    50  	rf               int
    51  	shards           []uint32
    52  	cutoverNanos     int64
    53  	version          int
    54  	maxShardSetID    uint32
    55  	isSharded        bool
    56  	isMirrored       bool
    57  }
    58  
    59  // NewPlacement returns a ServicePlacement
    60  func NewPlacement() Placement {
    61  	return &placement{}
    62  }
    63  
    64  // NewPlacementFromProto creates a new placement from proto.
    65  func NewPlacementFromProto(p *placementpb.Placement) (Placement, error) {
    66  	if p == nil {
    67  		return nil, errNilPlacementProto
    68  	}
    69  
    70  	shards := make([]uint32, p.NumShards)
    71  	for i := uint32(0); i < p.NumShards; i++ {
    72  		shards[i] = i
    73  	}
    74  	instances := make([]Instance, 0, len(p.Instances))
    75  	for _, instance := range p.Instances {
    76  		pi, err := NewInstanceFromProto(instance)
    77  		if err != nil {
    78  			return nil, err
    79  		}
    80  		instances = append(instances, pi)
    81  	}
    82  
    83  	return NewPlacement().
    84  		SetInstances(instances).
    85  		SetShards(shards).
    86  		SetReplicaFactor(int(p.ReplicaFactor)).
    87  		SetIsSharded(p.IsSharded).
    88  		SetCutoverNanos(p.CutoverTime).
    89  		SetIsMirrored(p.IsMirrored).
    90  		SetMaxShardSetID(p.MaxShardSetId), nil
    91  }
    92  
    93  func (p *placement) InstancesForShard(shard uint32) []Instance {
    94  	if len(p.instancesByShard) == 0 {
    95  		return nil
    96  	}
    97  	return p.instancesByShard[shard]
    98  }
    99  
   100  func (p *placement) Instances() []Instance {
   101  	result := make([]Instance, 0, p.NumInstances())
   102  	for _, instance := range p.instances {
   103  		result = append(result, instance)
   104  	}
   105  	sort.Sort(ByIDAscending(result))
   106  	return result
   107  }
   108  
   109  func (p *placement) SetInstances(instances []Instance) Placement {
   110  	instancesMap := make(map[string]Instance, len(instances))
   111  	instancesByShard := make(map[uint32][]Instance)
   112  	for _, instance := range instances {
   113  		instancesMap[instance.ID()] = instance
   114  		for _, shard := range instance.Shards().AllIDs() {
   115  			instancesByShard[shard] = append(instancesByShard[shard], instance)
   116  		}
   117  	}
   118  
   119  	// Sort the instances by their ids for deterministic ordering.
   120  	for _, instances := range instancesByShard {
   121  		sort.Sort(ByIDAscending(instances))
   122  	}
   123  
   124  	p.instancesByShard = instancesByShard
   125  	p.instances = instancesMap
   126  	return p
   127  }
   128  
   129  func (p *placement) NumInstances() int {
   130  	return len(p.instances)
   131  }
   132  
   133  func (p *placement) Instance(id string) (Instance, bool) {
   134  	instance, ok := p.instances[id]
   135  	return instance, ok
   136  }
   137  
   138  func (p *placement) ReplicaFactor() int {
   139  	return p.rf
   140  }
   141  
   142  func (p *placement) SetReplicaFactor(rf int) Placement {
   143  	p.rf = rf
   144  	return p
   145  }
   146  
   147  func (p *placement) Shards() []uint32 {
   148  	return p.shards
   149  }
   150  
   151  func (p *placement) SetShards(shards []uint32) Placement {
   152  	p.shards = shards
   153  	return p
   154  }
   155  
   156  func (p *placement) NumShards() int {
   157  	return len(p.shards)
   158  }
   159  
   160  func (p *placement) IsSharded() bool {
   161  	return p.isSharded
   162  }
   163  
   164  func (p *placement) SetIsSharded(v bool) Placement {
   165  	p.isSharded = v
   166  	return p
   167  }
   168  
   169  func (p *placement) IsMirrored() bool {
   170  	return p.isMirrored
   171  }
   172  
   173  func (p *placement) SetIsMirrored(v bool) Placement {
   174  	p.isMirrored = v
   175  	return p
   176  }
   177  
   178  func (p *placement) MaxShardSetID() uint32 {
   179  	return p.maxShardSetID
   180  }
   181  
   182  func (p *placement) SetMaxShardSetID(v uint32) Placement {
   183  	p.maxShardSetID = v
   184  	return p
   185  }
   186  
   187  func (p *placement) CutoverNanos() int64 {
   188  	return p.cutoverNanos
   189  }
   190  
   191  func (p *placement) SetCutoverNanos(cutoverNanos int64) Placement {
   192  	p.cutoverNanos = cutoverNanos
   193  	return p
   194  }
   195  
   196  func (p *placement) Version() int {
   197  	return p.version
   198  }
   199  
   200  func (p *placement) SetVersion(v int) Placement {
   201  	p.version = v
   202  	return p
   203  }
   204  
   205  func (p *placement) String() string {
   206  	return fmt.Sprintf(
   207  		"Placement[Instances=%s, NumShards=%d, ReplicaFactor=%d, IsSharded=%v, IsMirrored=%v]",
   208  		p.Instances(), p.NumShards(), p.ReplicaFactor(), p.IsSharded(), p.IsMirrored(),
   209  	)
   210  }
   211  
   212  func (p *placement) Proto() (*placementpb.Placement, error) {
   213  	instances := make(map[string]*placementpb.Instance, p.NumInstances())
   214  	for _, instance := range p.Instances() {
   215  		pi, err := instance.Proto()
   216  		if err != nil {
   217  			return nil, err
   218  		}
   219  		instances[instance.ID()] = pi
   220  	}
   221  
   222  	return &placementpb.Placement{
   223  		Instances:     instances,
   224  		ReplicaFactor: uint32(p.ReplicaFactor()),
   225  		NumShards:     uint32(p.NumShards()),
   226  		IsSharded:     p.IsSharded(),
   227  		CutoverTime:   p.CutoverNanos(),
   228  		IsMirrored:    p.IsMirrored(),
   229  		MaxShardSetId: p.MaxShardSetID(),
   230  	}, nil
   231  }
   232  
   233  func (p *placement) Clone() Placement {
   234  	return NewPlacement().
   235  		SetInstances(Instances(p.Instances()).Clone()).
   236  		SetShards(p.Shards()).
   237  		SetReplicaFactor(p.ReplicaFactor()).
   238  		SetIsSharded(p.IsSharded()).
   239  		SetIsMirrored(p.IsMirrored()).
   240  		SetCutoverNanos(p.CutoverNanos()).
   241  		SetMaxShardSetID(p.MaxShardSetID()).
   242  		SetVersion(p.Version())
   243  }
   244  
   245  // Validate validates a placement to ensure:
   246  // - The shards on each instance are in valid state.
   247  // - The total number of shards match rf * num_shards_per_replica.
   248  // - Each shard shows up rf times.
   249  // - There is one Initializing shard for each Leaving shard.
   250  // - The instances with same shard_set_id owns the same shards.
   251  func Validate(p Placement) error {
   252  	if err := validate(p); err != nil {
   253  		return xerrors.NewInvalidParamsError(err)
   254  	}
   255  	return nil
   256  }
   257  
   258  func validate(p Placement) error {
   259  	if p.IsMirrored() && !p.IsSharded() {
   260  		return errMirrorNotSharded
   261  	}
   262  
   263  	shardCountMap := convertShardSliceToMap(p.Shards())
   264  	if len(shardCountMap) != len(p.Shards()) {
   265  		return errDuplicatedShards
   266  	}
   267  
   268  	expectedTotal := len(p.Shards()) * p.ReplicaFactor()
   269  	totalCapacity := 0
   270  	totalLeaving := 0
   271  	totalInit := 0
   272  	totalInitWithSourceID := 0
   273  	instancesLeavingShardsWithMatchingInitShards := make(map[string]map[uint32]string)
   274  	maxShardSetID := p.MaxShardSetID()
   275  	instancesByShardSetID := make(map[uint32]Instance, p.NumInstances())
   276  	for _, instance := range p.Instances() {
   277  		if instance.Endpoint() == "" {
   278  			return fmt.Errorf("instance %s does not contain valid endpoint", instance.String())
   279  		}
   280  		if instance.Shards().NumShards() == 0 && p.IsSharded() {
   281  			return fmt.Errorf("instance %s contains no shard in a sharded placement", instance.String())
   282  		}
   283  		if instance.Shards().NumShards() != 0 && !p.IsSharded() {
   284  			return fmt.Errorf("instance %s contains shards in a non-sharded placement", instance.String())
   285  		}
   286  		shardSetID := instance.ShardSetID()
   287  		if shardSetID > maxShardSetID {
   288  			return fmt.Errorf("instance %s shard set id %d is larger than max shard set id %d in the placement", instance.String(), shardSetID, maxShardSetID)
   289  		}
   290  		for _, s := range instance.Shards().All() {
   291  			count, exist := shardCountMap[s.ID()]
   292  			if !exist {
   293  				return errUnexpectedShards
   294  			}
   295  			switch s.State() {
   296  			case shard.Available:
   297  				shardCountMap[s.ID()] = count + 1
   298  				totalCapacity++
   299  			case shard.Initializing:
   300  				totalInit++
   301  				shardCountMap[s.ID()] = count + 1
   302  				totalCapacity++
   303  				if sourceID := s.SourceID(); sourceID != "" {
   304  					totalInitWithSourceID++
   305  
   306  					// Check the instance.
   307  					leaving, ok := p.Instance(sourceID)
   308  					if !ok {
   309  						return fmt.Errorf(
   310  							"instance %s has initializing shard %d with "+
   311  								"source ID %s but no such instance in placement",
   312  							instance.ID(), s.ID(), sourceID)
   313  					}
   314  
   315  					// Check has leaving shard.
   316  					leavingShard, ok := leaving.Shards().Shard(s.ID())
   317  					if !ok {
   318  						return fmt.Errorf(
   319  							"instance %s has initializing shard %d with "+
   320  								"source ID %s but leaving instance has no such shard",
   321  							instance.ID(), s.ID(), sourceID)
   322  					}
   323  
   324  					// Check the shard is leaving.
   325  					if state := leavingShard.State(); state != shard.Leaving {
   326  						return fmt.Errorf(
   327  							"instance %s has initializing shard %d with "+
   328  								"source ID %s but leaving instance has shard with state %s",
   329  							instance.ID(), s.ID(), sourceID, state.String())
   330  					}
   331  
   332  					// Make sure does not get double matched.
   333  					matches, ok := instancesLeavingShardsWithMatchingInitShards[sourceID]
   334  					if !ok {
   335  						matches = make(map[uint32]string)
   336  						instancesLeavingShardsWithMatchingInitShards[sourceID] = matches
   337  					}
   338  
   339  					match, ok := matches[s.ID()]
   340  					if ok {
   341  						return fmt.Errorf(
   342  							"instance %s has initializing shard %d with "+
   343  								"source ID %s but leaving instance has shard already matched by %s",
   344  							instance.ID(), s.ID(), sourceID, match)
   345  					}
   346  
   347  					// Track that it's matched.
   348  					matches[s.ID()] = instance.ID()
   349  				}
   350  			case shard.Leaving:
   351  				totalLeaving++
   352  			default:
   353  				return fmt.Errorf("invalid shard state %v for shard %d", s.State(), s.ID())
   354  			}
   355  		}
   356  		if shardSetID == uninitializedShardSetID {
   357  			continue
   358  		}
   359  		existingInstance, exists := instancesByShardSetID[shardSetID]
   360  		if !exists {
   361  			instancesByShardSetID[shardSetID] = instance
   362  		} else {
   363  			// Both existing shard ids and current shard ids are sorted in ascending order.
   364  			existingShardIDs := existingInstance.Shards().AllIDs()
   365  			currShardIDs := instance.Shards().AllIDs()
   366  			if len(existingShardIDs) != len(currShardIDs) {
   367  				return fmt.Errorf("instance %s and %s have the same shard set id %d but different number of shards", existingInstance.String(), instance.String(), shardSetID)
   368  			}
   369  			for i := 0; i < len(existingShardIDs); i++ {
   370  				if existingShardIDs[i] != currShardIDs[i] {
   371  					return fmt.Errorf("instance %s and %s have the same shard set id %d but different shards", existingInstance.String(), instance.String(), shardSetID)
   372  				}
   373  			}
   374  		}
   375  	}
   376  
   377  	if !p.IsSharded() {
   378  		return nil
   379  	}
   380  
   381  	// initializing could be more than leaving for cases like initial placement
   382  	if totalLeaving > totalInit {
   383  		return fmt.Errorf("invalid placement, %d shards in Leaving state, more than %d in Initializing state", totalLeaving, totalInit)
   384  	}
   385  
   386  	if totalLeaving != totalInitWithSourceID {
   387  		return fmt.Errorf("invalid placement, %d shards in Leaving state, not equal %d in Initializing state with source id", totalLeaving, totalInitWithSourceID)
   388  	}
   389  
   390  	if expectedTotal != totalCapacity {
   391  		return fmt.Errorf("invalid placement, the total available shards in the placement is %d, expecting %d", totalCapacity, expectedTotal)
   392  	}
   393  
   394  	for shard, c := range shardCountMap {
   395  		if p.ReplicaFactor() != c {
   396  			return fmt.Errorf("invalid shard count for shard %d: expected %d, actual %d", shard, p.ReplicaFactor(), c)
   397  		}
   398  	}
   399  	return nil
   400  }
   401  
   402  func convertShardSliceToMap(ids []uint32) map[uint32]int {
   403  	shardCounts := make(map[uint32]int)
   404  	for _, id := range ids {
   405  		shardCounts[id] = 0
   406  	}
   407  	return shardCounts
   408  }
   409  
   410  // NewInstance returns a new Instance
   411  func NewInstance() Instance {
   412  	return &instance{shards: shard.NewShards(nil)}
   413  }
   414  
   415  // NewEmptyInstance returns a Instance with some basic properties but no shards assigned
   416  func NewEmptyInstance(id, isolationGroup, zone, endpoint string, weight uint32) Instance {
   417  	return &instance{
   418  		id:             id,
   419  		isolationGroup: isolationGroup,
   420  		zone:           zone,
   421  		weight:         weight,
   422  		endpoint:       endpoint,
   423  		shards:         shard.NewShards(nil),
   424  	}
   425  }
   426  
   427  // NewInstanceFromProto creates a new placement instance from proto.
   428  func NewInstanceFromProto(instance *placementpb.Instance) (Instance, error) {
   429  	if instance == nil {
   430  		return nil, errNilPlacementInstanceProto
   431  	}
   432  	shards, err := shard.NewShardsFromProto(instance.Shards)
   433  	if err != nil {
   434  		return nil, err
   435  	}
   436  	debugPort := uint32(0)
   437  	if instance.Metadata != nil {
   438  		debugPort = instance.Metadata.DebugPort
   439  	}
   440  
   441  	return NewInstance().
   442  		SetID(instance.Id).
   443  		SetIsolationGroup(instance.IsolationGroup).
   444  		SetWeight(instance.Weight).
   445  		SetZone(instance.Zone).
   446  		SetEndpoint(instance.Endpoint).
   447  		SetShards(shards).
   448  		SetShardSetID(instance.ShardSetId).
   449  		SetHostname(instance.Hostname).
   450  		SetPort(instance.Port).
   451  		SetMetadata(InstanceMetadata{
   452  			DebugPort: debugPort,
   453  		}), nil
   454  }
   455  
   456  type instance struct {
   457  	id             string
   458  	isolationGroup string
   459  	zone           string
   460  	endpoint       string
   461  	hostname       string
   462  	shards         shard.Shards
   463  	port           uint32
   464  	weight         uint32
   465  	shardSetID     uint32
   466  	metadata       InstanceMetadata
   467  }
   468  
   469  func (i *instance) String() string {
   470  	return fmt.Sprintf(
   471  		"Instance[ID=%s, IsolationGroup=%s, Zone=%s, Weight=%d, Endpoint=%s, Hostname=%s, Port=%d, ShardSetID=%d, Shards=%s, Metadata=%+v]",
   472  		i.id, i.isolationGroup, i.zone, i.weight, i.endpoint, i.hostname, i.port, i.shardSetID, i.shards.String(), i.metadata,
   473  	)
   474  }
   475  
   476  func (i *instance) ID() string {
   477  	return i.id
   478  }
   479  
   480  func (i *instance) SetID(id string) Instance {
   481  	i.id = id
   482  	return i
   483  }
   484  
   485  func (i *instance) IsolationGroup() string {
   486  	return i.isolationGroup
   487  }
   488  
   489  func (i *instance) SetIsolationGroup(r string) Instance {
   490  	i.isolationGroup = r
   491  	return i
   492  }
   493  
   494  func (i *instance) Zone() string {
   495  	return i.zone
   496  }
   497  
   498  func (i *instance) SetZone(z string) Instance {
   499  	i.zone = z
   500  	return i
   501  }
   502  
   503  func (i *instance) Weight() uint32 {
   504  	return i.weight
   505  }
   506  
   507  func (i *instance) SetWeight(w uint32) Instance {
   508  	i.weight = w
   509  	return i
   510  }
   511  
   512  func (i *instance) Endpoint() string {
   513  	return i.endpoint
   514  }
   515  
   516  func (i *instance) SetEndpoint(ip string) Instance {
   517  	i.endpoint = ip
   518  	return i
   519  }
   520  
   521  func (i *instance) Hostname() string {
   522  	return i.hostname
   523  }
   524  
   525  func (i *instance) SetHostname(value string) Instance {
   526  	i.hostname = value
   527  	return i
   528  }
   529  
   530  func (i *instance) Port() uint32 {
   531  	return i.port
   532  }
   533  
   534  func (i *instance) SetPort(value uint32) Instance {
   535  	i.port = value
   536  	return i
   537  }
   538  
   539  func (i *instance) ShardSetID() uint32 {
   540  	return i.shardSetID
   541  }
   542  
   543  func (i *instance) SetShardSetID(value uint32) Instance {
   544  	i.shardSetID = value
   545  	return i
   546  }
   547  
   548  func (i *instance) Shards() shard.Shards {
   549  	return i.shards
   550  }
   551  
   552  func (i *instance) SetShards(s shard.Shards) Instance {
   553  	i.shards = s
   554  	return i
   555  }
   556  
   557  func (i *instance) Metadata() InstanceMetadata {
   558  	return i.metadata
   559  }
   560  
   561  func (i *instance) SetMetadata(value InstanceMetadata) Instance {
   562  	i.metadata = value
   563  	return i
   564  }
   565  
   566  func (i *instance) Proto() (*placementpb.Instance, error) {
   567  	ss, err := i.Shards().Proto()
   568  	if err != nil {
   569  		return &placementpb.Instance{}, err
   570  	}
   571  
   572  	return &placementpb.Instance{
   573  		Id:             i.ID(),
   574  		IsolationGroup: i.IsolationGroup(),
   575  		Zone:           i.Zone(),
   576  		Weight:         i.Weight(),
   577  		Endpoint:       i.Endpoint(),
   578  		Shards:         ss,
   579  		ShardSetId:     i.ShardSetID(),
   580  		Hostname:       i.Hostname(),
   581  		Port:           i.Port(),
   582  		Metadata: &placementpb.InstanceMetadata{
   583  			DebugPort: i.Metadata().DebugPort,
   584  		},
   585  	}, nil
   586  }
   587  
   588  func (i *instance) IsLeaving() bool {
   589  	return i.allShardsInState(shard.Leaving)
   590  }
   591  
   592  func (i *instance) IsInitializing() bool {
   593  	return i.allShardsInState(shard.Initializing)
   594  }
   595  
   596  func (i *instance) IsAvailable() bool {
   597  	return i.allShardsInState(shard.Available)
   598  }
   599  
   600  func (i *instance) allShardsInState(s shard.State) bool {
   601  	ss := i.Shards()
   602  	numShards := ss.NumShards()
   603  	if numShards == 0 {
   604  		return false
   605  	}
   606  	return numShards == ss.NumShardsForState(s)
   607  }
   608  
   609  func (i *instance) Clone() Instance {
   610  	return NewInstance().
   611  		SetID(i.ID()).
   612  		SetIsolationGroup(i.IsolationGroup()).
   613  		SetZone(i.Zone()).
   614  		SetWeight(i.Weight()).
   615  		SetEndpoint(i.Endpoint()).
   616  		SetHostname(i.Hostname()).
   617  		SetPort(i.Port()).
   618  		SetShardSetID(i.ShardSetID()).
   619  		SetShards(i.Shards().Clone()).
   620  		SetMetadata(i.Metadata())
   621  }
   622  
   623  // Instances is a slice of instances that can produce a debug string.
   624  type Instances []Instance
   625  
   626  func (instances Instances) String() string {
   627  	if len(instances) == 0 {
   628  		return "[]"
   629  	}
   630  	// 256 should be pretty sufficient for the string representation
   631  	// of each instance.
   632  	strs := make([]string, 0, len(instances)*256)
   633  	strs = append(strs, "[\n")
   634  	for _, elem := range instances {
   635  		strs = append(strs, "\t"+elem.String()+",\n")
   636  	}
   637  	strs = append(strs, "]")
   638  	return strings.Join(strs, "")
   639  }
   640  
   641  // Clone returns a set of cloned instances.
   642  func (instances Instances) Clone() Instances {
   643  	cloned := make([]Instance, len(instances))
   644  	for i, instance := range instances {
   645  		cloned[i] = instance.Clone()
   646  	}
   647  	return cloned
   648  }
   649  
   650  // ByIDAscending sorts Instance by ID ascending
   651  type ByIDAscending []Instance
   652  
   653  func (s ByIDAscending) Len() int {
   654  	return len(s)
   655  }
   656  
   657  func (s ByIDAscending) Less(i, j int) bool {
   658  	return strings.Compare(s[i].ID(), s[j].ID()) < 0
   659  }
   660  
   661  func (s ByIDAscending) Swap(i, j int) {
   662  	s[i], s[j] = s[j], s[i]
   663  }