github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/peergrouper/desired.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "fmt" 8 "sort" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/juju/errors" 14 "github.com/juju/replicaset" 15 16 "github.com/juju/juju/core/status" 17 "github.com/juju/juju/network" 18 ) 19 20 // jujuMachineKey is the key for the tag where we save a member's machine id. 21 const jujuMachineKey = "juju-machine-id" 22 23 // peerGroupInfo holds information used in attempting to determine a Mongo 24 // peer group. 25 type peerGroupInfo struct { 26 // Maps below are keyed on machine ID. 27 28 // machines holds the machineTrackers for known controller machines sourced from the peergrouper 29 // worker. Indexed by machine.Id() 30 machines map[string]*machineTracker 31 32 // Replica-set members sourced from the Mongo session that are recognised by 33 // their association with known machines. 34 recognised map[string]replicaset.Member 35 36 // Replica-set member statuses sourced from the Mongo session. 37 statuses map[string]replicaset.MemberStatus 38 39 extra []replicaset.Member 40 maxMemberId int 41 mongoPort int 42 haSpace network.SpaceName 43 } 44 45 // desiredChanges tracks the specific changes we are asking to be made to the peer group. 46 type desiredChanges struct { 47 // isChanged is set False if the existing peer group is already in a valid configuration. 48 isChanged bool 49 50 // stepDownPrimary is set if we want to remove the vote from the Mongo Primary. This is specially flagged, 51 // because you have to ask the primary to step down before you can remove its vote. 52 stepDownPrimary bool 53 54 // members is the map of Id to replicaset.Member for the desired list of machines in the replicaset. 55 members map[string]*replicaset.Member 56 57 // machineVoting tracks which of the members should be set to vote. We should preseve an odd number of voters at all 58 // time. Also, when machines are first added to the replicaset, we wait to give them voting rights for when they 59 // have managed to sync the data from the current primary. 60 machineVoting map[string]bool 61 } 62 63 // peerGroupChanges tracks the process of computing the desiredChanges to the peer group. 64 type peerGroupChanges struct { 65 // info is the input state we will be processing 66 info *peerGroupInfo 67 68 // this block all represents active processing state 69 toRemoveVote []string 70 toAddVote []string 71 toKeepVoting []string 72 toKeepNonVoting []string 73 toKeepCreateNonVotingMember []string 74 75 // desired tracks the final changes to the peer group that we want to make 76 desired desiredChanges 77 } 78 79 func newPeerGroupInfo( 80 machines map[string]*machineTracker, 81 statuses []replicaset.MemberStatus, 82 members []replicaset.Member, 83 mongoPort int, 84 haSpace network.SpaceName, 85 ) (*peerGroupInfo, error) { 86 if len(members) == 0 { 87 return nil, fmt.Errorf("current member set is empty") 88 } 89 90 info := peerGroupInfo{ 91 machines: machines, 92 statuses: make(map[string]replicaset.MemberStatus), 93 recognised: make(map[string]replicaset.Member), 94 maxMemberId: -1, 95 mongoPort: mongoPort, 96 haSpace: haSpace, 97 } 98 99 // Iterate over the input members and associate them with a machine if 100 // possible; add any unassociated members to the "extra" slice. 101 // Link the statuses with the machine IDs where associated. 102 // Keep track of the highest member ID that we observe. 103 for _, m := range members { 104 found := false 105 if id, ok := m.Tags[jujuMachineKey]; ok { 106 if machines[id] != nil { 107 info.recognised[id] = m 108 found = true 109 } 110 111 // This invariably makes for N^2, but we anticipate small N. 112 for _, sts := range statuses { 113 if sts.Id == m.Id { 114 info.statuses[id] = sts 115 } 116 } 117 } 118 if !found { 119 info.extra = append(info.extra, m) 120 } 121 122 if m.Id > info.maxMemberId { 123 info.maxMemberId = m.Id 124 } 125 } 126 127 return &info, nil 128 } 129 130 // getLogMessage generates a nicely formatted log message from the known peer 131 // group information. 132 func (info *peerGroupInfo) getLogMessage() string { 133 lines := []string{ 134 fmt.Sprintf("calculating desired peer group\ndesired voting members: (maxId: %d)", info.maxMemberId), 135 } 136 137 template := "\n %#v: rs_id=%d, rs_addr=%s" 138 ids := make([]string, 0, len(info.recognised)) 139 for id := range info.recognised { 140 ids = append(ids, id) 141 } 142 sortAsInts(ids) 143 for _, id := range ids { 144 rm := info.recognised[id] 145 lines = append(lines, fmt.Sprintf(template, info.machines[id], rm.Id, rm.Address)) 146 } 147 148 if len(info.extra) > 0 { 149 lines = append(lines, "\nother members:") 150 151 template := "\n rs_id=%d, rs_addr=%s, tags=%v, vote=%t" 152 for _, em := range info.extra { 153 vote := em.Votes != nil && *em.Votes > 0 154 lines = append(lines, fmt.Sprintf(template, em.Id, em.Address, em.Tags, vote)) 155 } 156 } 157 158 return strings.Join(lines, "") 159 } 160 161 // initNewReplicaSet creates a new machine ID indexed map of known replica-set 162 // members to use as the basis for a newly calculated replica-set. 163 func (p *peerGroupChanges) initNewReplicaSet() map[string]*replicaset.Member { 164 rs := make(map[string]*replicaset.Member, len(p.info.recognised)) 165 for id := range p.info.recognised { 166 // Local-scoped variable required here, 167 // or the same pointer to the loop variable is used each time. 168 m := p.info.recognised[id] 169 rs[id] = &m 170 } 171 return rs 172 } 173 174 // desiredPeerGroup returns a new Mongo peer-group calculated from the input 175 // peerGroupInfo. 176 // Returned are the new members indexed by machine ID, and a map indicating 177 // which machines are set as voters in the new new peer-group. 178 // If the new peer-group is does not differ from that indicated by the input 179 // peerGroupInfo, a nil member map is returned along with the correct voters 180 // map. 181 // An error is returned if: 182 // 1) There are members unrecognised by machine association, 183 // and any of these are set as voters. 184 // 2) There is no HA space configured and any machines have multiple 185 // cloud-local addresses. 186 func desiredPeerGroup(info *peerGroupInfo) (desiredChanges, error) { 187 logger.Debugf(info.getLogMessage()) 188 189 peerChanges := peerGroupChanges{ 190 info: info, 191 desired: desiredChanges{ 192 isChanged: false, 193 stepDownPrimary: false, 194 machineVoting: map[string]bool{}, 195 members: map[string]*replicaset.Member{}, 196 }, 197 } 198 return peerChanges.computeDesiredPeerGroup() 199 } 200 201 func (p *peerGroupChanges) computeDesiredPeerGroup() (desiredChanges, error) { 202 203 // We may find extra peer group members if the machines have been removed 204 // or their controller status removed. 205 // This should only happen if they had been set to non-voting before 206 // removal, in which case we want to remove them from the members list. 207 // If we find a member that is still configured to vote, it is an error. 208 // TODO: There are some other possibilities for what to do in that case. 209 // 1) Leave them untouched, but deal with others as usual (ignore). 210 // 2) Leave them untouched and deal with others, but make sure the extras 211 // are not eligible to be primary. 212 // 3) Remove them. 213 // 4) Do nothing. 214 err := p.checkExtraMembers() 215 if err != nil { 216 return desiredChanges{}, errors.Trace(err) 217 } 218 219 p.desired.members = p.initNewReplicaSet() 220 p.possiblePeerGroupChanges() 221 p.reviewPeerGroupChanges() 222 p.createNonVotingMember() 223 224 // Set up initial record of machine votes. Any changes after 225 // this will trigger a peer group election. 226 p.getMachinesVoting() 227 p.adjustVotes() 228 229 if err := p.updateAddresses(); err != nil { 230 return desiredChanges{}, errors.Trace(err) 231 } 232 233 return p.desired, nil 234 } 235 236 // checkExtraMembers checks to see if any of the input members, identified as 237 // not being associated with machines, is set as a voter in the peer group. 238 // If any have, an error is returned. 239 // The boolean indicates whether any extra members were present at all. 240 func (p *peerGroupChanges) checkExtraMembers() error { 241 // Note: (jam 2018-04-18) With the new "juju remove-machine --force" it is much easier to get into this situation 242 // because an active controller that is in the replicaset would get removed while it still had voting rights. 243 // Given that Juju is in control of the replicaset we don't really just 'accept' that some other machine has a vote. 244 // *maybe* we could allow non-voting members that would be used by 3rd parties to provide a warm database backup. 245 // But I think the right answer is probably to downgrade unknown members from voting. 246 for _, member := range p.info.extra { 247 if isVotingMember(&member) { 248 return fmt.Errorf("voting non-machine member %v found in peer group", member) 249 } 250 } 251 if len(p.info.extra) > 0 { 252 p.desired.isChanged = true 253 } 254 return nil 255 } 256 257 // sortAsInts converts all the vals to an integer to sort them as numbers instead of strings 258 // If any of the values are not valid integers, they will be sorted as stirngs, and added to the end 259 // the slice will be sorted in place. 260 // (generally this should only be used for strings we expect to represent ints, but we don't want to error if 261 // something isn't an int.) 262 func sortAsInts(vals []string) { 263 asInts := make([]int, 0, len(vals)) 264 extra := []string{} 265 for _, val := range vals { 266 asInt, err := strconv.Atoi(val) 267 if err != nil { 268 extra = append(extra, val) 269 } else { 270 asInts = append(asInts, asInt) 271 } 272 } 273 sort.Ints(asInts) 274 sort.Strings(extra) 275 i := 0 276 for _, asInt := range asInts { 277 vals[i] = strconv.Itoa(asInt) 278 i++ 279 } 280 for _, val := range extra { 281 vals[i] = val 282 i++ 283 } 284 } 285 286 // possiblePeerGroupChanges returns a set of slices classifying all the 287 // existing machines according to how their vote might move. 288 // toRemoveVote holds machines whose vote should be removed; 289 // toAddVote holds machines which are ready to vote; 290 // toKeep holds machines with no desired change to their voting status 291 // (this includes machines that are not yet represented in the peer group). 292 func (p *peerGroupChanges) possiblePeerGroupChanges() { 293 machineIds := make([]string, 0, len(p.info.machines)) 294 for id := range p.info.machines { 295 machineIds = append(machineIds, id) 296 } 297 sortAsInts(machineIds) 298 logger.Debugf("assessing possible peer group changes:") 299 for _, id := range machineIds { 300 m := p.info.machines[id] 301 member := p.desired.members[id] 302 isVoting := member != nil && isVotingMember(member) 303 wantsVote := m.WantsVote() 304 switch { 305 case wantsVote && isVoting: 306 logger.Debugf("machine %q is already voting", id) 307 p.toKeepVoting = append(p.toKeepVoting, id) 308 case wantsVote && !isVoting: 309 if status, ok := p.info.statuses[id]; ok && isReady(status) { 310 logger.Debugf("machine %q is a potential voter", id) 311 p.toAddVote = append(p.toAddVote, id) 312 } else if member != nil { 313 logger.Debugf("machine %q exists but is not ready (status: %v, healthy: %v)", 314 id, status.State, status.Healthy) 315 p.toKeepNonVoting = append(p.toKeepNonVoting, id) 316 } else { 317 logger.Debugf("machine %q does not exist and is not ready (status: %v, healthy: %v)", 318 id, status.State, status.Healthy) 319 p.toKeepCreateNonVotingMember = append(p.toKeepCreateNonVotingMember, id) 320 } 321 case !wantsVote && isVoting: 322 p.toRemoveVote = append(p.toRemoveVote, id) 323 if isPrimaryMember(p.info, id) { 324 p.desired.stepDownPrimary = true 325 logger.Debugf("primary machine %q is a potential non-voter", id) 326 } else { 327 logger.Debugf("machine %q is a potential non-voter", id) 328 } 329 case !wantsVote && !isVoting: 330 logger.Debugf("machine %q does not want the vote", id) 331 p.toKeepNonVoting = append(p.toKeepNonVoting, id) 332 } 333 } 334 logger.Debugf("assessed") 335 } 336 337 func isReady(status replicaset.MemberStatus) bool { 338 return status.Healthy && (status.State == replicaset.PrimaryState || 339 status.State == replicaset.SecondaryState) 340 } 341 342 // reviewPeerGroupChanges adds some extra logic after creating 343 // possiblePeerGroupChanges to safely add or remove machines, keeping the 344 // correct odd number of voters peer structure, and preventing the primary from 345 // demotion. 346 func (p *peerGroupChanges) reviewPeerGroupChanges() { 347 currVoters := 0 348 for _, m := range p.desired.members { 349 if isVotingMember(m) { 350 currVoters += 1 351 } 352 } 353 keptVoters := currVoters - len(p.toRemoveVote) 354 if keptVoters == 0 { 355 // to keep no voters means to step down the primary without a replacement, which is not possible. 356 // So restore the current primary. Once there is another member to work with after reconfiguring, we will then 357 // be able to ask the current primary to step down, and then we can finally remove it. 358 var tempToRemove []string 359 for _, id := range p.toRemoveVote { 360 isPrimary := isPrimaryMember(p.info, id) 361 if !isPrimary { 362 tempToRemove = append(tempToRemove, id) 363 } else { 364 logger.Debugf("asked to remove all voters, preserving primary voter %q", id) 365 p.desired.stepDownPrimary = false 366 } 367 } 368 p.toRemoveVote = tempToRemove 369 } 370 newCount := keptVoters + len(p.toAddVote) 371 if (newCount)%2 == 1 { 372 logger.Debugf("number of voters is odd") 373 // if this is true we will create an odd number of voters 374 return 375 } 376 if len(p.toAddVote) > 0 { 377 last := p.toAddVote[len(p.toAddVote)-1] 378 logger.Debugf("number of voters would be even, not adding %q to maintain odd", last) 379 p.toAddVote = p.toAddVote[:len(p.toAddVote)-1] 380 return 381 } 382 // we must remove an extra peer 383 // make sure we don't pick the primary to be removed. 384 for i, id := range p.toKeepVoting { 385 if !isPrimaryMember(p.info, id) { 386 p.toRemoveVote = append(p.toRemoveVote, id) 387 logger.Debugf("removing vote from %q to maintain odd number of voters", id) 388 if i == len(p.toKeepVoting)-1 { 389 p.toKeepVoting = p.toKeepVoting[:i] 390 } else { 391 p.toKeepVoting = append(p.toKeepVoting[:i], p.toKeepVoting[i+1:]...) 392 } 393 break 394 } 395 } 396 } 397 398 func isVotingMember(m *replicaset.Member) bool { 399 v := m.Votes 400 return v == nil || *v > 0 401 } 402 403 func isPrimaryMember(info *peerGroupInfo, id string) bool { 404 return info.statuses[id].State == replicaset.PrimaryState 405 } 406 407 func setMemberVoting(member *replicaset.Member, voting bool) { 408 if voting { 409 member.Votes = nil 410 member.Priority = nil 411 } else { 412 votes := 0 413 member.Votes = &votes 414 priority := 0.0 415 member.Priority = &priority 416 } 417 } 418 419 // adjustVotes removes and adds votes to the members via setVoting. 420 func (p *peerGroupChanges) adjustVotes() { 421 setVoting := func(memberIds []string, voting bool) { 422 for _, id := range memberIds { 423 setMemberVoting(p.desired.members[id], voting) 424 p.desired.machineVoting[id] = voting 425 } 426 } 427 428 if len(p.toAddVote) > 0 || 429 len(p.toRemoveVote) > 0 || 430 len(p.toKeepCreateNonVotingMember) > 0 { 431 p.desired.isChanged = true 432 } 433 setVoting(p.toAddVote, true) 434 setVoting(p.toRemoveVote, false) 435 setVoting(p.toKeepCreateNonVotingMember, false) 436 } 437 438 // createMembers from a list of member IDs, instantiate a new replica-set 439 // member and add it to members map with the given ID. 440 func (p *peerGroupChanges) createNonVotingMember() { 441 for _, id := range p.toKeepCreateNonVotingMember { 442 logger.Debugf("create member with id %q", id) 443 p.info.maxMemberId++ 444 member := &replicaset.Member{ 445 Tags: map[string]string{ 446 jujuMachineKey: id, 447 }, 448 Id: p.info.maxMemberId, 449 } 450 setMemberVoting(member, false) 451 p.desired.members[id] = member 452 } 453 for _, id := range p.toKeepNonVoting { 454 if p.desired.members[id] != nil { 455 continue 456 } 457 logger.Debugf("create member with id %q", id) 458 p.info.maxMemberId++ 459 member := &replicaset.Member{ 460 Tags: map[string]string{ 461 jujuMachineKey: id, 462 }, 463 Id: p.info.maxMemberId, 464 } 465 setMemberVoting(member, false) 466 p.desired.members[id] = member 467 } 468 } 469 470 func (p *peerGroupChanges) getMachinesVoting() { 471 for id, m := range p.desired.members { 472 p.desired.machineVoting[id] = isVotingMember(m) 473 } 474 } 475 476 // updateAddresses updates the member addresses in the new replica-set, using 477 // the HA space if one is configured. 478 func (p *peerGroupChanges) updateAddresses() error { 479 var err error 480 if p.info.haSpace == "" { 481 err = p.updateAddressesFromInternal() 482 } else { 483 err = p.updateAddressesFromSpace() 484 } 485 return errors.Annotate(err, "updating member addresses") 486 } 487 488 const multiAddressMessage = "multiple usable addresses found" + 489 "\nrun \"juju config juju-ha-space=<name>\" to set a space for Mongo peer communication" 490 491 // updateAddressesFromInternal attempts to update each member with a 492 // cloud-local address from the machine. 493 // If there is a single cloud local address available, it is used. 494 // If there are multiple addresses, then a check is made to ensure that: 495 // - the member was previously in the replica-set and; 496 // - the previous address used for replication is still available. 497 // If the check is satisfied, then a warning is logged and no change is made. 498 // Otherwise an error is returned to indicate that a HA space must be 499 // configured in order to proceed. Such machines have their status set to 500 // indicate that they require intervention. 501 func (p *peerGroupChanges) updateAddressesFromInternal() error { 502 var multipleAddresses []string 503 504 for _, id := range p.sortedMemberIds() { 505 m := p.info.machines[id] 506 hostPorts := m.GetPotentialMongoHostPorts(p.info.mongoPort) 507 addrs := network.SelectInternalHostPorts(hostPorts, false) 508 509 // This should not happen because SelectInternalHostPorts will choose a 510 // public address when there are no cloud-local addresses. 511 // Zero addresses would mean the machine is completely inaccessible. 512 // We ignore this outcome and leave the address alone. 513 if len(addrs) == 0 { 514 continue 515 } 516 517 // Unique address; we can use this for Mongo peer communication. 518 member := p.desired.members[id] 519 if len(addrs) == 1 { 520 addr := addrs[0] 521 logger.Debugf("machine %q selected address %q by scope from %v", id, addr, hostPorts) 522 523 if member.Address != addr { 524 member.Address = addr 525 p.desired.isChanged = true 526 } 527 continue 528 } 529 530 // Multiple potential Mongo addresses. 531 // Checks are required in order to use it as a peer. 532 unchanged := false 533 if _, ok := p.info.recognised[id]; ok { 534 for _, addr := range addrs { 535 if member.Address == addr { 536 logger.Warningf("%s\npreserving member with unchanged address %q", multiAddressMessage, addr) 537 unchanged = true 538 break 539 } 540 } 541 } 542 543 // If this member was not previously in the replica-set, or if its 544 // address has changed, we enforce the policy of requiring a 545 // configured HA space when there are multiple cloud-local addresses. 546 if !unchanged { 547 multipleAddresses = append(multipleAddresses, id) 548 if err := m.stm.SetStatus(getStatusInfo(multiAddressMessage)); err != nil { 549 return errors.Trace(err) 550 } 551 } 552 } 553 554 if len(multipleAddresses) > 0 { 555 ids := strings.Join(multipleAddresses, ", ") 556 return fmt.Errorf("juju-ha-space is not set and these machines have more than one usable address: %s"+ 557 "\nrun \"juju config juju-ha-space=<name>\" to set a space for Mongo peer communication", ids) 558 } 559 return nil 560 } 561 562 // updateAddressesFromSpace updates the member addresses based on the 563 // configured HA space. 564 // If no addresses are available for any of the machines, then such machines 565 // have their status set and are included in the detail of the returned error. 566 func (p *peerGroupChanges) updateAddressesFromSpace() error { 567 space := p.info.haSpace 568 var noAddresses []string 569 570 for _, id := range p.sortedMemberIds() { 571 m := p.info.machines[id] 572 addr, err := m.SelectMongoAddressFromSpace(p.info.mongoPort, space) 573 if err != nil { 574 if errors.IsNotFound(err) { 575 noAddresses = append(noAddresses, id) 576 msg := fmt.Sprintf("no addresses in configured juju-ha-space %q", space) 577 if err := m.stm.SetStatus(getStatusInfo(msg)); err != nil { 578 return errors.Trace(err) 579 } 580 continue 581 } 582 return errors.Trace(err) 583 } 584 if addr != p.desired.members[id].Address { 585 p.desired.members[id].Address = addr 586 p.desired.isChanged = true 587 } 588 } 589 590 if len(noAddresses) > 0 { 591 ids := strings.Join(noAddresses, ", ") 592 return fmt.Errorf("no usable Mongo addresses found in configured juju-ha-space %q for machines: %s", space, ids) 593 } 594 return nil 595 } 596 597 // sortedMemberIds returns the list of p.desired.members in integer-sorted order 598 func (p *peerGroupChanges) sortedMemberIds() []string { 599 memberIds := make([]string, 0, len(p.desired.members)) 600 for id := range p.desired.members { 601 memberIds = append(memberIds, id) 602 } 603 sortAsInts(memberIds) 604 return memberIds 605 } 606 607 // getStatusInfo creates and returns a StatusInfo instance for use as a machine 608 // status. The *machine* status is not ideal for conveying this information, 609 // which is a really a characteristic of its role as a controller application. 610 // For this reason we leave the status as "Started" and supplement with an 611 // appropriate message. 612 // This is subject to change if/when controller status is represented in its 613 // own right. 614 func getStatusInfo(msg string) status.StatusInfo { 615 now := time.Now() 616 return status.StatusInfo{ 617 Status: status.Started, 618 Message: msg, 619 Since: &now, 620 } 621 }