github.com/m3db/m3@v1.5.0/src/cluster/placement/selector/mirrored.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package selector 22 23 import ( 24 "container/heap" 25 "errors" 26 "fmt" 27 "math" 28 29 "github.com/m3db/m3/src/cluster/placement" 30 31 "go.uber.org/zap" 32 ) 33 34 var ( 35 errNoValidMirrorInstance = errors.New("no valid instance for mirror placement in the candidate list") 36 ) 37 38 // mirroredPortSelector groups instances by their port--see NewPortMirroredSelector for details. 39 type mirroredPortSelector struct { 40 opts placement.Options 41 logger *zap.Logger 42 } 43 44 // NewPortMirroredSelector returns a placement.InstanceSelector which creates groups of instances 45 // by their port number and assigns a shardset to each group, taking isolation groups into account 46 // while creating groups. This is the default behavior used by NewInstanceSelector if IsMirrored 47 // is true. 48 func NewPortMirroredSelector(opts placement.Options) placement.InstanceSelector { 49 return &mirroredPortSelector{ 50 opts: opts, 51 logger: opts.InstrumentOptions().Logger(), 52 } 53 } 54 55 // SelectInitialInstances tries to make as many groups as possible from 56 // the candidate instances to make the initial placement. 57 func (f *mirroredPortSelector) SelectInitialInstances( 58 candidates []placement.Instance, 59 rf int, 60 ) ([]placement.Instance, error) { 61 candidates, err := getValidCandidates( 62 placement.NewPlacement(), 63 candidates, 64 f.opts, 65 ) 66 if err != nil { 67 return nil, err 68 } 69 70 weightToHostMap, err := groupHostsByWeight(candidates) 71 if err != nil { 72 return nil, err 73 } 74 75 var groups = make([][]placement.Instance, 0, len(candidates)) 76 for _, hosts := range weightToHostMap { 77 groupedHosts, ungrouped := groupHostsWithIsolationGroupCheck(hosts, rf) 78 if len(ungrouped) != 0 { 79 for _, host := range ungrouped { 80 f.logger.Warn("could not group", 81 zap.String("host", host.name), 82 zap.String("isolationGroup", host.isolationGroup), 83 zap.Uint32("weight", host.weight)) 84 } 85 } 86 if len(groupedHosts) == 0 { 87 continue 88 } 89 90 groupedInstances, err := groupInstancesByHostPort(groupedHosts, f.opts.SkipPortMirroring()) 91 if err != nil { 92 return nil, err 93 } 94 95 groups = append(groups, groupedInstances...) 96 } 97 98 if len(groups) == 0 { 99 return nil, errNoValidMirrorInstance 100 } 101 102 return assignShardsetsToGroupedInstances(groups, placement.NewPlacement()), nil 103 } 104 105 // SelectAddingInstances tries to make just one group of hosts from 106 // the candidate instances to be added to the placement. 107 func (f *mirroredPortSelector) SelectAddingInstances( 108 candidates []placement.Instance, 109 p placement.Placement, 110 ) ([]placement.Instance, error) { 111 candidates, err := getValidCandidates(p, candidates, f.opts) 112 if err != nil { 113 return nil, err 114 } 115 116 weightToHostMap, err := groupHostsByWeight(candidates) 117 if err != nil { 118 return nil, err 119 } 120 121 var groups = make([][]placement.Instance, 0, len(candidates)) 122 for _, hosts := range weightToHostMap { 123 groupedHosts, _ := groupHostsWithIsolationGroupCheck(hosts, p.ReplicaFactor()) 124 if len(groupedHosts) == 0 { 125 continue 126 } 127 128 if !f.opts.AddAllCandidates() { 129 // When AddAllCandidates option is disabled, we will only add 130 // one pair of hosts into the placement. 131 groups, err = groupInstancesByHostPort(groupedHosts[:1], f.opts.SkipPortMirroring()) 132 if err != nil { 133 return nil, err 134 } 135 136 break 137 } 138 139 newGroups, err := groupInstancesByHostPort(groupedHosts, f.opts.SkipPortMirroring()) 140 if err != nil { 141 return nil, err 142 } 143 groups = append(groups, newGroups...) 144 } 145 146 if len(groups) == 0 { 147 return nil, errNoValidMirrorInstance 148 } 149 150 return assignShardsetsToGroupedInstances(groups, p), nil 151 } 152 153 // SelectReplaceInstances for mirror supports replacing multiple instances from one host. 154 // Two main use cases: 155 // 1, find a new host from a pool of hosts to replace a host in the placement. 156 // 2, back out of a replacement, both leaving and adding host are still in the placement. 157 func (f *mirroredPortSelector) SelectReplaceInstances( 158 candidates []placement.Instance, 159 leavingInstanceIDs []string, 160 p placement.Placement, 161 ) ([]placement.Instance, error) { 162 candidates, err := getValidCandidates(p, candidates, f.opts) 163 if err != nil { 164 return nil, err 165 } 166 167 leavingInstances, err := getLeavingInstances(p, leavingInstanceIDs) 168 if err != nil { 169 return nil, err 170 } 171 172 // Validate leaving instances. 173 var ( 174 h host 175 ssIDs = make(map[uint32]struct{}, len(leavingInstances)) 176 ) 177 for _, instance := range leavingInstances { 178 if h.name == "" { 179 h = newHost(instance.Hostname(), instance.IsolationGroup(), instance.Weight()) 180 } 181 182 err := h.addInstance(instance.Port(), instance) 183 if err != nil { 184 return nil, err 185 } 186 ssIDs[instance.ShardSetID()] = struct{}{} 187 } 188 189 weightToHostMap, err := groupHostsByWeight(candidates) 190 if err != nil { 191 return nil, err 192 } 193 194 hosts, ok := weightToHostMap[h.weight] 195 if !ok { 196 return nil, fmt.Errorf("could not find instances with weight %d in the candidate list", h.weight) 197 } 198 199 // Find out the isolation groups that are already in the same shard set id with the leaving instances. 200 var conflictIGs = make(map[string]struct{}) 201 for _, instance := range p.Instances() { 202 if _, ok := ssIDs[instance.ShardSetID()]; !ok { 203 continue 204 } 205 if instance.Hostname() == h.name { 206 continue 207 } 208 if instance.IsLeaving() { 209 continue 210 } 211 212 conflictIGs[instance.IsolationGroup()] = struct{}{} 213 } 214 215 var replacementGroups []mirroredReplacementGroup 216 for _, candidateHost := range hosts { 217 if candidateHost.name == h.name { 218 continue 219 } 220 221 if _, ok := conflictIGs[candidateHost.isolationGroup]; ok { 222 continue 223 } 224 225 groups, err := groupInstancesByHostPort([][]host{{h, candidateHost}}, f.opts.SkipPortMirroring()) 226 if err != nil { 227 f.logger.Warn("could not match up candidate host with target host", 228 zap.String("candidate", candidateHost.name), 229 zap.String("target", h.name), 230 zap.Error(err)) 231 continue 232 } 233 234 for _, group := range groups { 235 if len(group) != 2 { 236 return nil, fmt.Errorf( 237 "unexpected length of instance group for replacement: %d", 238 len(group), 239 ) 240 } 241 242 replacementGroup := mirroredReplacementGroup{} 243 244 // search for leaving + replacement in the group (don't assume anything about the order) 245 for _, inst := range group { 246 if inst.Hostname() == h.name { 247 replacementGroup.Leaving = inst 248 } else if inst.Hostname() == candidateHost.name { 249 replacementGroup.Replacement = inst 250 } 251 } 252 if replacementGroup.Replacement == nil { 253 return nil, fmt.Errorf( 254 "programming error: failed to find replacement instance for host %s in group", 255 candidateHost.name, 256 ) 257 } 258 if replacementGroup.Leaving == nil { 259 return nil, fmt.Errorf( 260 "programming error: failed to find leaving instance for host %s in group", 261 h.name, 262 ) 263 } 264 265 replacementGroups = append( 266 replacementGroups, 267 replacementGroup, 268 ) 269 } 270 271 // Successfully grouped candidate with the host in placement. 272 break 273 } 274 275 if len(replacementGroups) == 0 { 276 return nil, errNoValidMirrorInstance 277 } 278 279 return assignShardsetIDsToReplacements(leavingInstanceIDs, replacementGroups) 280 } 281 282 // assignShardsetIDsToReplacements assigns the shardset of each leaving instance to each replacement 283 // instance. The output is ordered in the order of leavingInstanceIDs. 284 func assignShardsetIDsToReplacements( 285 leavingInstanceIDs []string, 286 groups []mirroredReplacementGroup, 287 ) ([]placement.Instance, error) { 288 if len(groups) != len(leavingInstanceIDs) { 289 return nil, fmt.Errorf( 290 "failed to find %d replacement instances to replace %d leaving instances", 291 len(groups), len(leavingInstanceIDs), 292 ) 293 } 294 // The groups returned from the groupInstances() might not be the same order as 295 // the instances in leavingInstanceIDs. We need to reorder them to the same order 296 // as leavingInstanceIDs. 297 var res = make([]placement.Instance, len(groups)) 298 for _, group := range groups { 299 idx := findIndex(leavingInstanceIDs, group.Leaving.ID()) 300 if idx == -1 { 301 return nil, fmt.Errorf( 302 "could not find instance id: '%s' in leaving instances", group.Leaving.ID()) 303 } 304 305 res[idx] = group.Replacement.SetShardSetID(group.Leaving.ShardSetID()) 306 } 307 return res, nil 308 } 309 310 func getLeavingInstances( 311 p placement.Placement, 312 leavingInstanceIDs []string, 313 ) ([]placement.Instance, error) { 314 leavingInstances := make([]placement.Instance, 0, len(leavingInstanceIDs)) 315 for _, id := range leavingInstanceIDs { 316 leavingInstance, exist := p.Instance(id) 317 if !exist { 318 return nil, errInstanceAbsent 319 } 320 leavingInstances = append(leavingInstances, leavingInstance) 321 } 322 return leavingInstances, nil 323 } 324 325 func findIndex(ids []string, id string) int { 326 for i := range ids { 327 if ids[i] == id { 328 return i 329 } 330 } 331 // Unexpected. 332 return -1 333 } 334 335 func groupHostsByWeight(candidates []placement.Instance) (map[uint32][]host, error) { 336 var ( 337 uniqueHosts = make(map[string]host, len(candidates)) 338 weightToHostsMap = make(map[uint32][]host, len(candidates)) 339 ) 340 for _, instance := range candidates { 341 hostname := instance.Hostname() 342 weight := instance.Weight() 343 h, ok := uniqueHosts[hostname] 344 if !ok { 345 h = newHost(hostname, instance.IsolationGroup(), weight) 346 uniqueHosts[hostname] = h 347 weightToHostsMap[weight] = append(weightToHostsMap[weight], h) 348 } 349 err := h.addInstance(instance.Port(), instance) 350 if err != nil { 351 return nil, err 352 } 353 } 354 return weightToHostsMap, nil 355 } 356 357 // groupHostsWithIsolationGroupCheck looks at the isolation groups of the given hosts 358 // and try to make as many groups as possible. The hosts in each group 359 // must come from different isolation groups. 360 func groupHostsWithIsolationGroupCheck(hosts []host, rf int) (groups [][]host, ungrouped []host) { 361 if len(hosts) < rf { 362 // When the number of hosts is less than rf, no groups can be made. 363 return nil, hosts 364 } 365 366 var ( 367 uniqIGs = make(map[string]*group, len(hosts)) 368 rh = groupsByNumHost(make([]*group, 0, len(hosts))) 369 ) 370 for _, h := range hosts { 371 r, ok := uniqIGs[h.isolationGroup] 372 if !ok { 373 r = &group{ 374 isolationGroup: h.isolationGroup, 375 hosts: make([]host, 0, rf), 376 } 377 378 uniqIGs[h.isolationGroup] = r 379 rh = append(rh, r) 380 } 381 r.hosts = append(r.hosts, h) 382 } 383 384 heap.Init(&rh) 385 386 // For each group, always prefer to find one host from the largest isolation group 387 // in the heap. After a group is filled, push all the checked isolation groups back 388 // to the heap so they can be used for the next group. 389 groups = make([][]host, 0, int(math.Ceil(float64(len(hosts))/float64(rf)))) 390 for rh.Len() >= rf { 391 // When there are more than rf isolation groups available, try to make a group. 392 seenIGs := make(map[string]*group, rf) 393 g := make([]host, 0, rf) 394 for i := 0; i < rf; i++ { 395 r := heap.Pop(&rh).(*group) 396 // Move the host from the isolation group to the group. 397 // The isolation groups in the heap always have at least one host. 398 g = append(g, r.hosts[len(r.hosts)-1]) 399 r.hosts = r.hosts[:len(r.hosts)-1] 400 seenIGs[r.isolationGroup] = r 401 } 402 if len(g) == rf { 403 groups = append(groups, g) 404 } 405 for _, r := range seenIGs { 406 if len(r.hosts) > 0 { 407 heap.Push(&rh, r) 408 } 409 } 410 } 411 412 ungrouped = make([]host, 0, rh.Len()) 413 for _, r := range rh { 414 ungrouped = append(ungrouped, r.hosts...) 415 } 416 return groups, ungrouped 417 } 418 419 func groupInstancesByHostPort(hostGroups [][]host, skipPortMatching bool) ([][]placement.Instance, error) { 420 var instanceGroups = make([][]placement.Instance, 0, len(hostGroups)) 421 for _, hostGroup := range hostGroups { 422 if !skipPortMatching { 423 for port, instance := range hostGroup[0].portToInstance { 424 instanceGroup := make([]placement.Instance, 0, len(hostGroup)) 425 instanceGroup = append(instanceGroup, instance) 426 for _, otherHost := range hostGroup[1:] { 427 otherInstance, ok := otherHost.portToInstance[port] 428 if !ok { 429 return nil, fmt.Errorf("could not find port %d on host %s", port, otherHost.name) 430 } 431 instanceGroup = append(instanceGroup, otherInstance) 432 } 433 instanceGroups = append(instanceGroups, instanceGroup) 434 } 435 } else { 436 numInstancesPerHost, instancesByHost := convertHostGroupToInstanceLists(hostGroup) 437 for i := 0; i < numInstancesPerHost; i++ { 438 instanceGroup := make([]placement.Instance, 0, len(hostGroup)) 439 for _, list := range instancesByHost { 440 instanceGroup = append(instanceGroup, list[i]) 441 } 442 instanceGroups = append(instanceGroups, instanceGroup) 443 } 444 } 445 } 446 return instanceGroups, nil 447 } 448 449 func convertHostGroupToInstanceLists(hostGroup []host) (int, [][]placement.Instance) { 450 numInstancePerHost := 0 451 instancesByHost := make([][]placement.Instance, 0, len(hostGroup)) 452 for i, host := range hostGroup { 453 if i == 0 { 454 numInstancePerHost = len(host.portToInstance) 455 } else if numInstancePerHost > len(host.portToInstance) { 456 numInstancePerHost = len(host.portToInstance) 457 } 458 459 instances := make([]placement.Instance, 0, numInstancePerHost) 460 for _, instance := range host.portToInstance { 461 instances = append(instances, instance) 462 } 463 instancesByHost = append(instancesByHost, instances) 464 } 465 466 return numInstancePerHost, instancesByHost 467 } 468 469 // assignShardsetsToGroupedInstances is a helper for mirrored selectors, which assigns shardset 470 // IDs to the given groups. 471 func assignShardsetsToGroupedInstances( 472 groups [][]placement.Instance, 473 p placement.Placement, 474 ) []placement.Instance { 475 var ( 476 instances = make([]placement.Instance, 0, p.ReplicaFactor()*len(groups)) 477 currShardSetID = p.MaxShardSetID() + 1 478 ssID uint32 479 ) 480 for _, group := range groups { 481 useNewSSID := shouldUseNewShardSetID(group, p) 482 483 if useNewSSID { 484 ssID = currShardSetID 485 currShardSetID++ 486 } 487 for _, instance := range group { 488 if useNewSSID { 489 instance = instance.SetShardSetID(ssID) 490 } 491 instances = append(instances, instance) 492 } 493 } 494 return instances 495 } 496 497 func shouldUseNewShardSetID( 498 group []placement.Instance, 499 p placement.Placement, 500 ) bool { 501 var seenSSID *uint32 502 for _, instance := range group { 503 instanceInPlacement, exist := p.Instance(instance.ID()) 504 if !exist { 505 return true 506 } 507 currentSSID := instanceInPlacement.ShardSetID() 508 if seenSSID == nil { 509 seenSSID = ¤tSSID 510 continue 511 } 512 if *seenSSID != currentSSID { 513 return true 514 } 515 } 516 return false 517 } 518 519 type host struct { 520 name string 521 isolationGroup string 522 weight uint32 523 portToInstance map[uint32]placement.Instance 524 } 525 526 func newHost(name, isolationGroup string, weight uint32) host { 527 return host{ 528 name: name, 529 isolationGroup: isolationGroup, 530 weight: weight, 531 portToInstance: make(map[uint32]placement.Instance), 532 } 533 } 534 535 func (h host) addInstance(port uint32, instance placement.Instance) error { 536 if h.weight != instance.Weight() { 537 return fmt.Errorf("could not add instance %s to host %s, weight mismatch: %d and %d", 538 instance.ID(), h.name, instance.Weight(), h.weight) 539 } 540 if h.isolationGroup != instance.IsolationGroup() { 541 return fmt.Errorf("could not add instance %s to host %s, isolation group mismatch: %s and %s", 542 instance.ID(), h.name, instance.IsolationGroup(), h.isolationGroup) 543 } 544 h.portToInstance[port] = instance 545 return nil 546 } 547 548 type group struct { 549 isolationGroup string 550 hosts []host 551 } 552 553 type groupsByNumHost []*group 554 555 func (h groupsByNumHost) Len() int { 556 return len(h) 557 } 558 559 func (h groupsByNumHost) Less(i, j int) bool { 560 return len(h[i].hosts) > len(h[j].hosts) 561 } 562 563 func (h groupsByNumHost) Swap(i, j int) { 564 h[i], h[j] = h[j], h[i] 565 } 566 567 func (h *groupsByNumHost) Push(i interface{}) { 568 r := i.(*group) 569 *h = append(*h, r) 570 } 571 572 func (h *groupsByNumHost) Pop() interface{} { 573 old := *h 574 n := len(old) 575 g := old[n-1] 576 *h = old[0 : n-1] 577 return g 578 }