github.com/m3db/m3@v1.5.0/src/cluster/placement/algo/mirrored.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package algo
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"strconv"
    27  
    28  	"github.com/m3db/m3/src/cluster/placement"
    29  	"github.com/m3db/m3/src/cluster/shard"
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  )
    32  
    33  var (
    34  	errIncompatibleWithMirrorAlgo = errors.New("could not apply mirrored algo on the placement")
    35  )
    36  
    37  type mirroredAlgorithm struct {
    38  	opts        placement.Options
    39  	shardedAlgo placement.Algorithm
    40  }
    41  
    42  func newMirroredAlgorithm(opts placement.Options) placement.Algorithm {
    43  	return mirroredAlgorithm{
    44  		opts: opts,
    45  		// Mirrored algorithm requires full replacement.
    46  		shardedAlgo: newShardedAlgorithm(opts.SetAllowPartialReplace(false)),
    47  	}
    48  }
    49  
    50  func (a mirroredAlgorithm) IsCompatibleWith(p placement.Placement) error {
    51  	if !p.IsMirrored() {
    52  		return errIncompatibleWithMirrorAlgo
    53  	}
    54  
    55  	if !p.IsSharded() {
    56  		return errIncompatibleWithMirrorAlgo
    57  	}
    58  
    59  	return nil
    60  }
    61  
    62  func (a mirroredAlgorithm) InitialPlacement(
    63  	instances []placement.Instance,
    64  	shards []uint32,
    65  	rf int,
    66  ) (placement.Placement, error) {
    67  	mirrorInstances, err := groupInstancesByShardSetID(instances, rf)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  
    72  	// We use the sharded algorithm to generate a mirror placement with rf equals 1.
    73  	mirrorPlacement, err := a.shardedAlgo.InitialPlacement(mirrorInstances, shards, 1)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  
    78  	return placementFromMirror(mirrorPlacement, instances, rf)
    79  }
    80  
    81  func (a mirroredAlgorithm) AddReplica(p placement.Placement) (placement.Placement, error) {
    82  	// TODO(cw): We could support AddReplica(p placement.Placement, instances []placement.Instance)
    83  	// and apply the shards from the new replica to the adding instances in the future.
    84  	return nil, errors.New("not supported")
    85  }
    86  
    87  func (a mirroredAlgorithm) RemoveInstances(
    88  	p placement.Placement,
    89  	instanceIDs []string,
    90  ) (placement.Placement, error) {
    91  	if err := a.IsCompatibleWith(p); err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	nowNanos := a.opts.NowFn()().UnixNano()
    96  	// If the instances being removed are all the initializing instances in the placement.
    97  	// We just need to return these shards back to their sources.
    98  	if globalChecker.allInitializing(p, instanceIDs, nowNanos) {
    99  		return a.returnInitializingShards(p, instanceIDs)
   100  	}
   101  
   102  	p, _, err := a.MarkAllShardsAvailable(p)
   103  	if err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	removingInstances := make([]placement.Instance, 0, len(instanceIDs))
   108  	for _, id := range instanceIDs {
   109  		instance, ok := p.Instance(id)
   110  		if !ok {
   111  			return nil, fmt.Errorf("instance %s does not exist in the placement", id)
   112  		}
   113  		removingInstances = append(removingInstances, instance)
   114  	}
   115  
   116  	mirrorPlacement, err := mirrorFromPlacement(p)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	mirrorInstances, err := groupInstancesByShardSetID(removingInstances, p.ReplicaFactor())
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	for _, instance := range mirrorInstances {
   127  		if mirrorPlacement, err = a.shardedAlgo.RemoveInstances(
   128  			mirrorPlacement,
   129  			[]string{instance.ID()},
   130  		); err != nil {
   131  			return nil, err
   132  		}
   133  	}
   134  	return placementFromMirror(mirrorPlacement, p.Instances(), p.ReplicaFactor())
   135  }
   136  
   137  func (a mirroredAlgorithm) AddInstances(
   138  	p placement.Placement,
   139  	addingInstances []placement.Instance,
   140  ) (placement.Placement, error) {
   141  	if err := a.IsCompatibleWith(p); err != nil {
   142  		return nil, err
   143  	}
   144  
   145  	nowNanos := a.opts.NowFn()().UnixNano()
   146  	// If the instances being added are all the leaving instances in the placement.
   147  	// We just need to get their shards back.
   148  	if globalChecker.allLeaving(p, addingInstances, nowNanos) {
   149  		return a.reclaimLeavingShards(p, addingInstances)
   150  	}
   151  
   152  	p, _, err := a.MarkAllShardsAvailable(p)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	// At this point, all leaving instances in the placement are cleaned up.
   158  	if addingInstances, err = validAddingInstances(p, addingInstances); err != nil {
   159  		return nil, err
   160  	}
   161  
   162  	mirrorPlacement, err := mirrorFromPlacement(p)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	mirrorInstances, err := groupInstancesByShardSetID(addingInstances, p.ReplicaFactor())
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	for _, instance := range mirrorInstances {
   173  		if mirrorPlacement, err = a.shardedAlgo.AddInstances(
   174  			mirrorPlacement,
   175  			[]placement.Instance{instance},
   176  		); err != nil {
   177  			return nil, err
   178  		}
   179  	}
   180  
   181  	return placementFromMirror(mirrorPlacement, append(p.Instances(), addingInstances...), p.ReplicaFactor())
   182  }
   183  
   184  func (a mirroredAlgorithm) ReplaceInstances(
   185  	p placement.Placement,
   186  	leavingInstanceIDs []string,
   187  	addingInstances []placement.Instance,
   188  ) (placement.Placement, error) {
   189  	err := a.IsCompatibleWith(p)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	p = p.Clone()
   195  	if len(addingInstances) != len(leavingInstanceIDs) {
   196  		return nil, fmt.Errorf("could not replace %d instances with %d instances for mirrored replace", len(leavingInstanceIDs), len(addingInstances))
   197  	}
   198  
   199  	nowNanos := a.opts.NowFn()().UnixNano()
   200  
   201  	// Revert of pending replace.
   202  	if localChecker.allLeaving(p, addingInstances, nowNanos) &&
   203  		localChecker.allInitializing(p, leavingInstanceIDs, nowNanos) {
   204  		if p, err = a.reclaimLeavingShards(p, addingInstances); err != nil {
   205  			return nil, err
   206  		}
   207  
   208  		return a.returnInitializingShards(p, leavingInstanceIDs)
   209  	}
   210  
   211  	// Mark shards available only for the specified leaving instances and their peers.
   212  	// This allows multiple replaces that do not overlap by shard ownership.
   213  	for _, leavingInstanceID := range leavingInstanceIDs {
   214  		if p, err = a.markInstanceAndItsPeersAvailable(p, leavingInstanceID); err != nil {
   215  			return nil, err
   216  		}
   217  	}
   218  
   219  	if !localChecker.allAvailable(p, leavingInstanceIDs, nowNanos) {
   220  		return nil, fmt.Errorf("replaced instances must have all their shards available")
   221  	}
   222  
   223  	// At this point, all specified leaving instances and their peers in the placement are cleaned up.
   224  	if addingInstances, err = validAddingInstances(p, addingInstances); err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	for i := range leavingInstanceIDs {
   229  		// We want full replacement for each instance.
   230  		if p, err = a.shardedAlgo.ReplaceInstances(
   231  			p,
   232  			leavingInstanceIDs[i:i+1],
   233  			addingInstances[i:i+1],
   234  		); err != nil {
   235  			return nil, err
   236  		}
   237  	}
   238  	return p, nil
   239  }
   240  
   241  func (a mirroredAlgorithm) markInstanceAndItsPeersAvailable(
   242  	p placement.Placement,
   243  	instanceID string,
   244  ) (placement.Placement, error) {
   245  	p = p.Clone()
   246  	instance, exist := p.Instance(instanceID)
   247  	if !exist {
   248  		return nil, fmt.Errorf("instance %s does not exist in placement", instanceID)
   249  	}
   250  
   251  	// Find all peers of specified instance - those owning same shardset.
   252  	// That includes instances that are replaced by this instance or replace this instance.
   253  	var ownerIDs []string
   254  	for _, i := range p.Instances() {
   255  		if i.ShardSetID() == instance.ShardSetID() {
   256  			ownerIDs = append(ownerIDs, i.ID())
   257  		}
   258  	}
   259  
   260  	for _, id := range ownerIDs {
   261  		instance, exists := p.Instance(id)
   262  		if !exists {
   263  			// Instance with leaving shards could already be removed from placement
   264  			// after initializing shards are marked available (if past cutover time) by below code block.
   265  			continue
   266  		}
   267  
   268  		for _, s := range instance.Shards().All() {
   269  			if s.State() == shard.Initializing {
   270  				var err error
   271  				// MarkShardsAvailable will properly handle respective leaving shards
   272  				// of the respective peer leaving instance.
   273  				p, err = a.shardedAlgo.MarkShardsAvailable(p, id, s.ID())
   274  				if err != nil {
   275  					return nil, xerrors.Wrapf(err, "could not mark shards available of instance %s", id)
   276  				}
   277  			}
   278  		}
   279  	}
   280  
   281  	return p, nil
   282  }
   283  
   284  func (a mirroredAlgorithm) MarkShardsAvailable(
   285  	p placement.Placement,
   286  	instanceID string,
   287  	shardIDs ...uint32,
   288  ) (placement.Placement, error) {
   289  	if err := a.IsCompatibleWith(p); err != nil {
   290  		return nil, err
   291  	}
   292  
   293  	return a.shardedAlgo.MarkShardsAvailable(p, instanceID, shardIDs...)
   294  }
   295  
   296  func (a mirroredAlgorithm) MarkAllShardsAvailable(
   297  	p placement.Placement,
   298  ) (placement.Placement, bool, error) {
   299  	if err := a.IsCompatibleWith(p); err != nil {
   300  		return nil, false, err
   301  	}
   302  
   303  	return a.shardedAlgo.MarkAllShardsAvailable(p)
   304  }
   305  
   306  func (a mirroredAlgorithm) BalanceShards(
   307  	p placement.Placement,
   308  ) (placement.Placement, error) {
   309  	if err := a.IsCompatibleWith(p); err != nil {
   310  		return nil, err
   311  	}
   312  
   313  	mirrorPlacement, err := mirrorFromPlacement(p)
   314  	if err != nil {
   315  		return nil, err
   316  	}
   317  
   318  	if mirrorPlacement, err = a.shardedAlgo.BalanceShards(mirrorPlacement); err != nil {
   319  		return nil, err
   320  	}
   321  
   322  	return placementFromMirror(mirrorPlacement, p.Instances(), p.ReplicaFactor())
   323  }
   324  
   325  // returnInitializingShards tries to return initializing shards on the given instances
   326  // and retries until no more initializing shards could be returned.
   327  func (a mirroredAlgorithm) returnInitializingShards(
   328  	p placement.Placement,
   329  	instanceIDs []string,
   330  ) (placement.Placement, error) {
   331  	for {
   332  		madeProgess := false
   333  		for _, id := range instanceIDs {
   334  			_, exist := p.Instance(id)
   335  			if !exist {
   336  				continue
   337  			}
   338  			ph, instance, err := newRemoveInstanceHelper(p, id, a.opts)
   339  			if err != nil {
   340  				return nil, err
   341  			}
   342  			numInitShards := instance.Shards().NumShardsForState(shard.Initializing)
   343  			ph.returnInitializingShards(instance)
   344  			if instance.Shards().NumShardsForState(shard.Initializing) < numInitShards {
   345  				// Made some progress on returning shards.
   346  				madeProgess = true
   347  			}
   348  			p = ph.generatePlacement()
   349  			if instance.Shards().NumShards() > 0 {
   350  				p = p.SetInstances(append(p.Instances(), instance))
   351  			}
   352  		}
   353  		if !madeProgess {
   354  			break
   355  		}
   356  	}
   357  
   358  	for _, id := range instanceIDs {
   359  		instance, ok := p.Instance(id)
   360  		if !ok {
   361  			continue
   362  		}
   363  		numInitializingShards := instance.Shards().NumShardsForState(shard.Initializing)
   364  		if numInitializingShards != 0 {
   365  			return nil, fmt.Errorf("there are %d initializing shards could not be returned for instance %s", numInitializingShards, id)
   366  		}
   367  	}
   368  
   369  	return p, nil
   370  }
   371  
   372  // reclaimLeavingShards tries to reclaim leaving shards on the given instances
   373  // and retries until no more leaving shards could be reclaimed.
   374  func (a mirroredAlgorithm) reclaimLeavingShards(
   375  	p placement.Placement,
   376  	addingInstances []placement.Instance,
   377  ) (placement.Placement, error) {
   378  	for {
   379  		madeProgess := false
   380  		for _, instance := range addingInstances {
   381  			ph, instance, err := newAddInstanceHelper(p, instance, a.opts, withAvailableOrLeavingShardsOnly)
   382  			if err != nil {
   383  				return nil, err
   384  			}
   385  			numLeavingShards := instance.Shards().NumShardsForState(shard.Leaving)
   386  			ph.reclaimLeavingShards(instance)
   387  			if instance.Shards().NumShardsForState(shard.Leaving) < numLeavingShards {
   388  				// Made some progress on reclaiming shards.
   389  				madeProgess = true
   390  			}
   391  			p = ph.generatePlacement()
   392  		}
   393  		if !madeProgess {
   394  			break
   395  		}
   396  	}
   397  
   398  	for _, instance := range addingInstances {
   399  		id := instance.ID()
   400  		instance, ok := p.Instance(id)
   401  		if !ok {
   402  			return nil, fmt.Errorf("could not find instance %s in placement after reclaiming leaving shards", id)
   403  		}
   404  		numLeavingShards := instance.Shards().NumShardsForState(shard.Leaving)
   405  		if numLeavingShards != 0 {
   406  			return nil, fmt.Errorf("there are %d leaving shards could not be reclaimed for instance %s", numLeavingShards, id)
   407  		}
   408  	}
   409  
   410  	return p, nil
   411  }
   412  
   413  func validAddingInstances(p placement.Placement, addingInstances []placement.Instance) ([]placement.Instance, error) {
   414  	for i, instance := range addingInstances {
   415  		if _, exist := p.Instance(instance.ID()); exist {
   416  			return nil, fmt.Errorf("instance %s already exist in the placement", instance.ID())
   417  		}
   418  		if instance.IsLeaving() {
   419  			// The instance was leaving in placement, after markAllShardsAsAvailable it is now removed
   420  			// from the placement, so we should treat them as fresh new instances.
   421  			addingInstances[i] = instance.SetShards(shard.NewShards(nil))
   422  		}
   423  	}
   424  	return addingInstances, nil
   425  }
   426  
   427  func groupInstancesByShardSetID(
   428  	instances []placement.Instance,
   429  	rf int,
   430  ) ([]placement.Instance, error) {
   431  	var (
   432  		shardSetMap = make(map[uint32]*shardSetMetadata, len(instances))
   433  		res         = make([]placement.Instance, 0, len(instances))
   434  	)
   435  	for _, instance := range instances {
   436  		var (
   437  			ssID   = instance.ShardSetID()
   438  			weight = instance.Weight()
   439  			group  = instance.IsolationGroup()
   440  			shards = instance.Shards()
   441  		)
   442  		meta, ok := shardSetMap[ssID]
   443  		if !ok {
   444  			meta = &shardSetMetadata{
   445  				weight: weight,
   446  				groups: make(map[string]struct{}, rf),
   447  				shards: shards,
   448  			}
   449  			shardSetMap[ssID] = meta
   450  		}
   451  		if _, ok := meta.groups[group]; ok {
   452  			return nil, fmt.Errorf("found duplicated isolation group %s for shardset id %d", group, ssID)
   453  		}
   454  
   455  		if meta.weight != weight {
   456  			return nil, fmt.Errorf("found different weights: %d and %d, for shardset id %d", meta.weight, weight, ssID)
   457  		}
   458  
   459  		if !meta.shards.Equals(shards) {
   460  			return nil, fmt.Errorf("found different shards: %v and %v, for shardset id %d", meta.shards, shards, ssID)
   461  		}
   462  
   463  		meta.groups[group] = struct{}{}
   464  		meta.count++
   465  	}
   466  
   467  	for ssID, meta := range shardSetMap {
   468  		if meta.count != rf {
   469  			return nil, fmt.Errorf("found %d count of shard set id %d, expecting %d", meta.count, ssID, rf)
   470  		}
   471  
   472  		// NB(cw) The shard set ID should to be assigned in placement service,
   473  		// the algorithm does not change the shard set id assigned to each instance.
   474  		ssIDStr := strconv.Itoa(int(ssID))
   475  		res = append(
   476  			res,
   477  			placement.NewInstance().
   478  				SetID(ssIDStr).
   479  				SetIsolationGroup(ssIDStr).
   480  				SetWeight(meta.weight).
   481  				SetShardSetID(ssID).
   482  				SetShards(meta.shards.Clone()),
   483  		)
   484  	}
   485  
   486  	return res, nil
   487  }
   488  
   489  // mirrorFromPlacement zips all instances with the same shardSetID into a virtual instance
   490  // and create a placement with those virtual instance and rf=1.
   491  func mirrorFromPlacement(p placement.Placement) (placement.Placement, error) {
   492  	mirrorInstances, err := groupInstancesByShardSetID(p.Instances(), p.ReplicaFactor())
   493  	if err != nil {
   494  		return nil, err
   495  	}
   496  
   497  	return placement.NewPlacement().
   498  		SetInstances(mirrorInstances).
   499  		SetReplicaFactor(1).
   500  		SetShards(p.Shards()).
   501  		SetCutoverNanos(p.CutoverNanos()).
   502  		SetIsSharded(true).
   503  		SetIsMirrored(true).
   504  		SetMaxShardSetID(p.MaxShardSetID()), nil
   505  }
   506  
   507  // placementFromMirror duplicates the shards for each shard set id and assign
   508  // them to the instance with the shard set id.
   509  func placementFromMirror(
   510  	mirror placement.Placement,
   511  	instances []placement.Instance,
   512  	rf int,
   513  ) (placement.Placement, error) {
   514  	var (
   515  		mirrorInstances     = mirror.Instances()
   516  		shardSetMap         = make(map[uint32][]placement.Instance, len(mirrorInstances))
   517  		instancesWithShards = make([]placement.Instance, 0, len(instances))
   518  	)
   519  	for _, instance := range instances {
   520  		instances, ok := shardSetMap[instance.ShardSetID()]
   521  		if !ok {
   522  			instances = make([]placement.Instance, 0, rf)
   523  		}
   524  		instances = append(instances, instance)
   525  		shardSetMap[instance.ShardSetID()] = instances
   526  	}
   527  
   528  	for _, mirrorInstance := range mirrorInstances {
   529  		instances, err := instancesFromMirror(mirrorInstance, shardSetMap)
   530  		if err != nil {
   531  			return nil, err
   532  		}
   533  		instancesWithShards = append(instancesWithShards, instances...)
   534  	}
   535  
   536  	return placement.NewPlacement().
   537  		SetInstances(instancesWithShards).
   538  		SetReplicaFactor(rf).
   539  		SetShards(mirror.Shards()).
   540  		SetCutoverNanos(mirror.CutoverNanos()).
   541  		SetIsMirrored(true).
   542  		SetIsSharded(true).
   543  		SetMaxShardSetID(mirror.MaxShardSetID()), nil
   544  }
   545  
   546  func instancesFromMirror(
   547  	mirrorInstance placement.Instance,
   548  	instancesMap map[uint32][]placement.Instance,
   549  ) ([]placement.Instance, error) {
   550  	ssID := mirrorInstance.ShardSetID()
   551  	instances, ok := instancesMap[ssID]
   552  	if !ok {
   553  		return nil, fmt.Errorf("could not find shard set id %d in placement", ssID)
   554  	}
   555  
   556  	shards := mirrorInstance.Shards()
   557  	for i, instance := range instances {
   558  		newShards := make([]shard.Shard, shards.NumShards())
   559  		for j, s := range shards.All() {
   560  			// TODO move clone() to shard interface
   561  			newShard := shard.NewShard(s.ID()).SetState(s.State()).SetCutoffNanos(s.CutoffNanos()).SetCutoverNanos(s.CutoverNanos())
   562  			sourceID := s.SourceID()
   563  			if sourceID != "" {
   564  				// The sourceID in the mirror placement is shardSetID, need to be converted
   565  				// to instanceID.
   566  				shardSetID, err := strconv.Atoi(sourceID)
   567  				if err != nil {
   568  					return nil, fmt.Errorf("could not convert source id %s to shard set id", sourceID)
   569  				}
   570  				sourceInstances, ok := instancesMap[uint32(shardSetID)]
   571  				if !ok {
   572  					return nil, fmt.Errorf("could not find source id %s in placement", sourceID)
   573  				}
   574  
   575  				sourceID = sourceInstances[i].ID()
   576  			}
   577  			newShards[j] = newShard.SetSourceID(sourceID)
   578  		}
   579  		instances[i] = instance.SetShards(shard.NewShards(newShards))
   580  	}
   581  	return instances, nil
   582  }
   583  
   584  type shardSetMetadata struct {
   585  	weight uint32
   586  	count  int
   587  	groups map[string]struct{}
   588  	shards shard.Shards
   589  }