github.com/m3db/m3@v1.5.0/src/cluster/placement/selector/mirrored.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package selector
    22  
    23  import (
    24  	"container/heap"
    25  	"errors"
    26  	"fmt"
    27  	"math"
    28  
    29  	"github.com/m3db/m3/src/cluster/placement"
    30  
    31  	"go.uber.org/zap"
    32  )
    33  
    34  var (
    35  	errNoValidMirrorInstance = errors.New("no valid instance for mirror placement in the candidate list")
    36  )
    37  
    38  // mirroredPortSelector groups instances by their port--see NewPortMirroredSelector for details.
    39  type mirroredPortSelector struct {
    40  	opts   placement.Options
    41  	logger *zap.Logger
    42  }
    43  
    44  // NewPortMirroredSelector returns a placement.InstanceSelector which creates groups of instances
    45  // by their port number and assigns a shardset to each group, taking isolation groups into account
    46  // while creating groups. This is the default behavior used by NewInstanceSelector if IsMirrored
    47  // is true.
    48  func NewPortMirroredSelector(opts placement.Options) placement.InstanceSelector {
    49  	return &mirroredPortSelector{
    50  		opts:   opts,
    51  		logger: opts.InstrumentOptions().Logger(),
    52  	}
    53  }
    54  
    55  // SelectInitialInstances tries to make as many groups as possible from
    56  // the candidate instances to make the initial placement.
    57  func (f *mirroredPortSelector) SelectInitialInstances(
    58  	candidates []placement.Instance,
    59  	rf int,
    60  ) ([]placement.Instance, error) {
    61  	candidates, err := getValidCandidates(
    62  		placement.NewPlacement(),
    63  		candidates,
    64  		f.opts,
    65  	)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	weightToHostMap, err := groupHostsByWeight(candidates)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	var groups = make([][]placement.Instance, 0, len(candidates))
    76  	for _, hosts := range weightToHostMap {
    77  		groupedHosts, ungrouped := groupHostsWithIsolationGroupCheck(hosts, rf)
    78  		if len(ungrouped) != 0 {
    79  			for _, host := range ungrouped {
    80  				f.logger.Warn("could not group",
    81  					zap.String("host", host.name),
    82  					zap.String("isolationGroup", host.isolationGroup),
    83  					zap.Uint32("weight", host.weight))
    84  			}
    85  		}
    86  		if len(groupedHosts) == 0 {
    87  			continue
    88  		}
    89  
    90  		groupedInstances, err := groupInstancesByHostPort(groupedHosts, f.opts.SkipPortMirroring())
    91  		if err != nil {
    92  			return nil, err
    93  		}
    94  
    95  		groups = append(groups, groupedInstances...)
    96  	}
    97  
    98  	if len(groups) == 0 {
    99  		return nil, errNoValidMirrorInstance
   100  	}
   101  
   102  	return assignShardsetsToGroupedInstances(groups, placement.NewPlacement()), nil
   103  }
   104  
   105  // SelectAddingInstances tries to make just one group of hosts from
   106  // the candidate instances to be added to the placement.
   107  func (f *mirroredPortSelector) SelectAddingInstances(
   108  	candidates []placement.Instance,
   109  	p placement.Placement,
   110  ) ([]placement.Instance, error) {
   111  	candidates, err := getValidCandidates(p, candidates, f.opts)
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  
   116  	weightToHostMap, err := groupHostsByWeight(candidates)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	var groups = make([][]placement.Instance, 0, len(candidates))
   122  	for _, hosts := range weightToHostMap {
   123  		groupedHosts, _ := groupHostsWithIsolationGroupCheck(hosts, p.ReplicaFactor())
   124  		if len(groupedHosts) == 0 {
   125  			continue
   126  		}
   127  
   128  		if !f.opts.AddAllCandidates() {
   129  			// When AddAllCandidates option is disabled, we will only add
   130  			// one pair of hosts into the placement.
   131  			groups, err = groupInstancesByHostPort(groupedHosts[:1], f.opts.SkipPortMirroring())
   132  			if err != nil {
   133  				return nil, err
   134  			}
   135  
   136  			break
   137  		}
   138  
   139  		newGroups, err := groupInstancesByHostPort(groupedHosts, f.opts.SkipPortMirroring())
   140  		if err != nil {
   141  			return nil, err
   142  		}
   143  		groups = append(groups, newGroups...)
   144  	}
   145  
   146  	if len(groups) == 0 {
   147  		return nil, errNoValidMirrorInstance
   148  	}
   149  
   150  	return assignShardsetsToGroupedInstances(groups, p), nil
   151  }
   152  
   153  // SelectReplaceInstances for mirror supports replacing multiple instances from one host.
   154  // Two main use cases:
   155  // 1, find a new host from a pool of hosts to replace a host in the placement.
   156  // 2, back out of a replacement, both leaving and adding host are still in the placement.
   157  func (f *mirroredPortSelector) SelectReplaceInstances(
   158  	candidates []placement.Instance,
   159  	leavingInstanceIDs []string,
   160  	p placement.Placement,
   161  ) ([]placement.Instance, error) {
   162  	candidates, err := getValidCandidates(p, candidates, f.opts)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	leavingInstances, err := getLeavingInstances(p, leavingInstanceIDs)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	// Validate leaving instances.
   173  	var (
   174  		h     host
   175  		ssIDs = make(map[uint32]struct{}, len(leavingInstances))
   176  	)
   177  	for _, instance := range leavingInstances {
   178  		if h.name == "" {
   179  			h = newHost(instance.Hostname(), instance.IsolationGroup(), instance.Weight())
   180  		}
   181  
   182  		err := h.addInstance(instance.Port(), instance)
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		ssIDs[instance.ShardSetID()] = struct{}{}
   187  	}
   188  
   189  	weightToHostMap, err := groupHostsByWeight(candidates)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	hosts, ok := weightToHostMap[h.weight]
   195  	if !ok {
   196  		return nil, fmt.Errorf("could not find instances with weight %d in the candidate list", h.weight)
   197  	}
   198  
   199  	// Find out the isolation groups that are already in the same shard set id with the leaving instances.
   200  	var conflictIGs = make(map[string]struct{})
   201  	for _, instance := range p.Instances() {
   202  		if _, ok := ssIDs[instance.ShardSetID()]; !ok {
   203  			continue
   204  		}
   205  		if instance.Hostname() == h.name {
   206  			continue
   207  		}
   208  		if instance.IsLeaving() {
   209  			continue
   210  		}
   211  
   212  		conflictIGs[instance.IsolationGroup()] = struct{}{}
   213  	}
   214  
   215  	var replacementGroups []mirroredReplacementGroup
   216  	for _, candidateHost := range hosts {
   217  		if candidateHost.name == h.name {
   218  			continue
   219  		}
   220  
   221  		if _, ok := conflictIGs[candidateHost.isolationGroup]; ok {
   222  			continue
   223  		}
   224  
   225  		groups, err := groupInstancesByHostPort([][]host{{h, candidateHost}}, f.opts.SkipPortMirroring())
   226  		if err != nil {
   227  			f.logger.Warn("could not match up candidate host with target host",
   228  				zap.String("candidate", candidateHost.name),
   229  				zap.String("target", h.name),
   230  				zap.Error(err))
   231  			continue
   232  		}
   233  
   234  		for _, group := range groups {
   235  			if len(group) != 2 {
   236  				return nil, fmt.Errorf(
   237  					"unexpected length of instance group for replacement: %d",
   238  					len(group),
   239  				)
   240  			}
   241  
   242  			replacementGroup := mirroredReplacementGroup{}
   243  
   244  			// search for leaving + replacement in the group (don't assume anything about the order)
   245  			for _, inst := range group {
   246  				if inst.Hostname() == h.name {
   247  					replacementGroup.Leaving = inst
   248  				} else if inst.Hostname() == candidateHost.name {
   249  					replacementGroup.Replacement = inst
   250  				}
   251  			}
   252  			if replacementGroup.Replacement == nil {
   253  				return nil, fmt.Errorf(
   254  					"programming error: failed to find replacement instance for host %s in group",
   255  					candidateHost.name,
   256  				)
   257  			}
   258  			if replacementGroup.Leaving == nil {
   259  				return nil, fmt.Errorf(
   260  					"programming error: failed to find leaving instance for host %s in group",
   261  					h.name,
   262  				)
   263  			}
   264  
   265  			replacementGroups = append(
   266  				replacementGroups,
   267  				replacementGroup,
   268  			)
   269  		}
   270  
   271  		// Successfully grouped candidate with the host in placement.
   272  		break
   273  	}
   274  
   275  	if len(replacementGroups) == 0 {
   276  		return nil, errNoValidMirrorInstance
   277  	}
   278  
   279  	return assignShardsetIDsToReplacements(leavingInstanceIDs, replacementGroups)
   280  }
   281  
   282  // assignShardsetIDsToReplacements assigns the shardset of each leaving instance to each replacement
   283  // instance. The output is ordered in the order of leavingInstanceIDs.
   284  func assignShardsetIDsToReplacements(
   285  	leavingInstanceIDs []string,
   286  	groups []mirroredReplacementGroup,
   287  ) ([]placement.Instance, error) {
   288  	if len(groups) != len(leavingInstanceIDs) {
   289  		return nil, fmt.Errorf(
   290  			"failed to find %d replacement instances to replace %d leaving instances",
   291  			len(groups), len(leavingInstanceIDs),
   292  		)
   293  	}
   294  	// The groups returned from the groupInstances() might not be the same order as
   295  	// the instances in leavingInstanceIDs. We need to reorder them to the same order
   296  	// as leavingInstanceIDs.
   297  	var res = make([]placement.Instance, len(groups))
   298  	for _, group := range groups {
   299  		idx := findIndex(leavingInstanceIDs, group.Leaving.ID())
   300  		if idx == -1 {
   301  			return nil, fmt.Errorf(
   302  				"could not find instance id: '%s' in leaving instances", group.Leaving.ID())
   303  		}
   304  
   305  		res[idx] = group.Replacement.SetShardSetID(group.Leaving.ShardSetID())
   306  	}
   307  	return res, nil
   308  }
   309  
   310  func getLeavingInstances(
   311  	p placement.Placement,
   312  	leavingInstanceIDs []string,
   313  ) ([]placement.Instance, error) {
   314  	leavingInstances := make([]placement.Instance, 0, len(leavingInstanceIDs))
   315  	for _, id := range leavingInstanceIDs {
   316  		leavingInstance, exist := p.Instance(id)
   317  		if !exist {
   318  			return nil, errInstanceAbsent
   319  		}
   320  		leavingInstances = append(leavingInstances, leavingInstance)
   321  	}
   322  	return leavingInstances, nil
   323  }
   324  
   325  func findIndex(ids []string, id string) int {
   326  	for i := range ids {
   327  		if ids[i] == id {
   328  			return i
   329  		}
   330  	}
   331  	// Unexpected.
   332  	return -1
   333  }
   334  
   335  func groupHostsByWeight(candidates []placement.Instance) (map[uint32][]host, error) {
   336  	var (
   337  		uniqueHosts      = make(map[string]host, len(candidates))
   338  		weightToHostsMap = make(map[uint32][]host, len(candidates))
   339  	)
   340  	for _, instance := range candidates {
   341  		hostname := instance.Hostname()
   342  		weight := instance.Weight()
   343  		h, ok := uniqueHosts[hostname]
   344  		if !ok {
   345  			h = newHost(hostname, instance.IsolationGroup(), weight)
   346  			uniqueHosts[hostname] = h
   347  			weightToHostsMap[weight] = append(weightToHostsMap[weight], h)
   348  		}
   349  		err := h.addInstance(instance.Port(), instance)
   350  		if err != nil {
   351  			return nil, err
   352  		}
   353  	}
   354  	return weightToHostsMap, nil
   355  }
   356  
   357  // groupHostsWithIsolationGroupCheck looks at the isolation groups of the given hosts
   358  // and try to make as many groups as possible. The hosts in each group
   359  // must come from different isolation groups.
   360  func groupHostsWithIsolationGroupCheck(hosts []host, rf int) (groups [][]host, ungrouped []host) {
   361  	if len(hosts) < rf {
   362  		// When the number of hosts is less than rf, no groups can be made.
   363  		return nil, hosts
   364  	}
   365  
   366  	var (
   367  		uniqIGs = make(map[string]*group, len(hosts))
   368  		rh      = groupsByNumHost(make([]*group, 0, len(hosts)))
   369  	)
   370  	for _, h := range hosts {
   371  		r, ok := uniqIGs[h.isolationGroup]
   372  		if !ok {
   373  			r = &group{
   374  				isolationGroup: h.isolationGroup,
   375  				hosts:          make([]host, 0, rf),
   376  			}
   377  
   378  			uniqIGs[h.isolationGroup] = r
   379  			rh = append(rh, r)
   380  		}
   381  		r.hosts = append(r.hosts, h)
   382  	}
   383  
   384  	heap.Init(&rh)
   385  
   386  	// For each group, always prefer to find one host from the largest isolation group
   387  	// in the heap. After a group is filled, push all the checked isolation groups back
   388  	// to the heap so they can be used for the next group.
   389  	groups = make([][]host, 0, int(math.Ceil(float64(len(hosts))/float64(rf))))
   390  	for rh.Len() >= rf {
   391  		// When there are more than rf isolation groups available, try to make a group.
   392  		seenIGs := make(map[string]*group, rf)
   393  		g := make([]host, 0, rf)
   394  		for i := 0; i < rf; i++ {
   395  			r := heap.Pop(&rh).(*group)
   396  			// Move the host from the isolation group to the group.
   397  			// The isolation groups in the heap always have at least one host.
   398  			g = append(g, r.hosts[len(r.hosts)-1])
   399  			r.hosts = r.hosts[:len(r.hosts)-1]
   400  			seenIGs[r.isolationGroup] = r
   401  		}
   402  		if len(g) == rf {
   403  			groups = append(groups, g)
   404  		}
   405  		for _, r := range seenIGs {
   406  			if len(r.hosts) > 0 {
   407  				heap.Push(&rh, r)
   408  			}
   409  		}
   410  	}
   411  
   412  	ungrouped = make([]host, 0, rh.Len())
   413  	for _, r := range rh {
   414  		ungrouped = append(ungrouped, r.hosts...)
   415  	}
   416  	return groups, ungrouped
   417  }
   418  
   419  func groupInstancesByHostPort(hostGroups [][]host, skipPortMatching bool) ([][]placement.Instance, error) {
   420  	var instanceGroups = make([][]placement.Instance, 0, len(hostGroups))
   421  	for _, hostGroup := range hostGroups {
   422  		if !skipPortMatching {
   423  			for port, instance := range hostGroup[0].portToInstance {
   424  				instanceGroup := make([]placement.Instance, 0, len(hostGroup))
   425  				instanceGroup = append(instanceGroup, instance)
   426  				for _, otherHost := range hostGroup[1:] {
   427  					otherInstance, ok := otherHost.portToInstance[port]
   428  					if !ok {
   429  						return nil, fmt.Errorf("could not find port %d on host %s", port, otherHost.name)
   430  					}
   431  					instanceGroup = append(instanceGroup, otherInstance)
   432  				}
   433  				instanceGroups = append(instanceGroups, instanceGroup)
   434  			}
   435  		} else {
   436  			numInstancesPerHost, instancesByHost := convertHostGroupToInstanceLists(hostGroup)
   437  			for i := 0; i < numInstancesPerHost; i++ {
   438  				instanceGroup := make([]placement.Instance, 0, len(hostGroup))
   439  				for _, list := range instancesByHost {
   440  					instanceGroup = append(instanceGroup, list[i])
   441  				}
   442  				instanceGroups = append(instanceGroups, instanceGroup)
   443  			}
   444  		}
   445  	}
   446  	return instanceGroups, nil
   447  }
   448  
   449  func convertHostGroupToInstanceLists(hostGroup []host) (int, [][]placement.Instance) {
   450  	numInstancePerHost := 0
   451  	instancesByHost := make([][]placement.Instance, 0, len(hostGroup))
   452  	for i, host := range hostGroup {
   453  		if i == 0 {
   454  			numInstancePerHost = len(host.portToInstance)
   455  		} else if numInstancePerHost > len(host.portToInstance) {
   456  			numInstancePerHost = len(host.portToInstance)
   457  		}
   458  
   459  		instances := make([]placement.Instance, 0, numInstancePerHost)
   460  		for _, instance := range host.portToInstance {
   461  			instances = append(instances, instance)
   462  		}
   463  		instancesByHost = append(instancesByHost, instances)
   464  	}
   465  
   466  	return numInstancePerHost, instancesByHost
   467  }
   468  
   469  // assignShardsetsToGroupedInstances is a helper for mirrored selectors, which assigns shardset
   470  // IDs to the given groups.
   471  func assignShardsetsToGroupedInstances(
   472  	groups [][]placement.Instance,
   473  	p placement.Placement,
   474  ) []placement.Instance {
   475  	var (
   476  		instances      = make([]placement.Instance, 0, p.ReplicaFactor()*len(groups))
   477  		currShardSetID = p.MaxShardSetID() + 1
   478  		ssID           uint32
   479  	)
   480  	for _, group := range groups {
   481  		useNewSSID := shouldUseNewShardSetID(group, p)
   482  
   483  		if useNewSSID {
   484  			ssID = currShardSetID
   485  			currShardSetID++
   486  		}
   487  		for _, instance := range group {
   488  			if useNewSSID {
   489  				instance = instance.SetShardSetID(ssID)
   490  			}
   491  			instances = append(instances, instance)
   492  		}
   493  	}
   494  	return instances
   495  }
   496  
   497  func shouldUseNewShardSetID(
   498  	group []placement.Instance,
   499  	p placement.Placement,
   500  ) bool {
   501  	var seenSSID *uint32
   502  	for _, instance := range group {
   503  		instanceInPlacement, exist := p.Instance(instance.ID())
   504  		if !exist {
   505  			return true
   506  		}
   507  		currentSSID := instanceInPlacement.ShardSetID()
   508  		if seenSSID == nil {
   509  			seenSSID = &currentSSID
   510  			continue
   511  		}
   512  		if *seenSSID != currentSSID {
   513  			return true
   514  		}
   515  	}
   516  	return false
   517  }
   518  
   519  type host struct {
   520  	name           string
   521  	isolationGroup string
   522  	weight         uint32
   523  	portToInstance map[uint32]placement.Instance
   524  }
   525  
   526  func newHost(name, isolationGroup string, weight uint32) host {
   527  	return host{
   528  		name:           name,
   529  		isolationGroup: isolationGroup,
   530  		weight:         weight,
   531  		portToInstance: make(map[uint32]placement.Instance),
   532  	}
   533  }
   534  
   535  func (h host) addInstance(port uint32, instance placement.Instance) error {
   536  	if h.weight != instance.Weight() {
   537  		return fmt.Errorf("could not add instance %s to host %s, weight mismatch: %d and %d",
   538  			instance.ID(), h.name, instance.Weight(), h.weight)
   539  	}
   540  	if h.isolationGroup != instance.IsolationGroup() {
   541  		return fmt.Errorf("could not add instance %s to host %s, isolation group mismatch: %s and %s",
   542  			instance.ID(), h.name, instance.IsolationGroup(), h.isolationGroup)
   543  	}
   544  	h.portToInstance[port] = instance
   545  	return nil
   546  }
   547  
   548  type group struct {
   549  	isolationGroup string
   550  	hosts          []host
   551  }
   552  
   553  type groupsByNumHost []*group
   554  
   555  func (h groupsByNumHost) Len() int {
   556  	return len(h)
   557  }
   558  
   559  func (h groupsByNumHost) Less(i, j int) bool {
   560  	return len(h[i].hosts) > len(h[j].hosts)
   561  }
   562  
   563  func (h groupsByNumHost) Swap(i, j int) {
   564  	h[i], h[j] = h[j], h[i]
   565  }
   566  
   567  func (h *groupsByNumHost) Push(i interface{}) {
   568  	r := i.(*group)
   569  	*h = append(*h, r)
   570  }
   571  
   572  func (h *groupsByNumHost) Pop() interface{} {
   573  	old := *h
   574  	n := len(old)
   575  	g := old[n-1]
   576  	*h = old[0 : n-1]
   577  	return g
   578  }