github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/peergrouper/worker.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "fmt" 8 "net" 9 "reflect" 10 "sort" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/juju/clock" 16 "github.com/juju/errors" 17 "github.com/juju/loggo" 18 "github.com/juju/replicaset" 19 "github.com/kr/pretty" 20 "gopkg.in/juju/worker.v1" 21 "gopkg.in/juju/worker.v1/catacomb" 22 23 "github.com/juju/juju/controller" 24 "github.com/juju/juju/core/status" 25 "github.com/juju/juju/network" 26 "github.com/juju/juju/pubsub/apiserver" 27 "github.com/juju/juju/state" 28 ) 29 30 var logger = loggo.GetLogger("juju.worker.peergrouper") 31 32 type State interface { 33 RemoveControllerMachine(m Machine) error 34 ControllerConfig() (controller.Config, error) 35 ControllerInfo() (*state.ControllerInfo, error) 36 Machine(id string) (Machine, error) 37 WatchControllerInfo() state.NotifyWatcher 38 WatchControllerStatusChanges() state.StringsWatcher 39 WatchControllerConfig() state.NotifyWatcher 40 } 41 42 type Space interface { 43 Name() string 44 } 45 46 type Machine interface { 47 Id() string 48 Life() state.Life 49 Status() (status.StatusInfo, error) 50 SetStatus(status.StatusInfo) error 51 Refresh() error 52 Watch() state.NotifyWatcher 53 WantsVote() bool 54 HasVote() bool 55 SetHasVote(hasVote bool) error 56 Addresses() []network.Address 57 } 58 59 type MongoSession interface { 60 CurrentStatus() (*replicaset.Status, error) 61 CurrentMembers() ([]replicaset.Member, error) 62 Set([]replicaset.Member) error 63 StepDownPrimary() error 64 Refresh() 65 } 66 67 type APIHostPortsSetter interface { 68 SetAPIHostPorts([][]network.HostPort) error 69 } 70 71 var ( 72 // If we fail to set the mongo replica set members, 73 // we start retrying with the following interval, 74 // before exponentially backing off with each further 75 // attempt. 76 initialRetryInterval = 2 * time.Second 77 78 // maxRetryInterval holds the maximum interval 79 // between retry attempts. 80 maxRetryInterval = 5 * time.Minute 81 82 // pollInterval holds the interval at which the replica set 83 // members will be updated even in the absence of changes 84 // to State. This enables us to make changes to members 85 // that are triggered by changes to member status. 86 pollInterval = 1 * time.Minute 87 ) 88 89 // Hub defines the methods of the apiserver centralhub that the peer 90 // grouper uses. 91 type Hub interface { 92 Subscribe(topic string, handler interface{}) (func(), error) 93 Publish(topic string, data interface{}) (<-chan struct{}, error) 94 } 95 96 // pgWorker is a worker which watches the controller machines in state 97 // as well as the MongoDB replicaset configuration, adding and 98 // removing controller machines as they change or are added and 99 // removed. 100 type pgWorker struct { 101 catacomb catacomb.Catacomb 102 103 config Config 104 105 // machineChanges receives events from the machineTrackers when 106 // controller machines change in ways that are relevant to the 107 // peergrouper. 108 machineChanges chan struct{} 109 110 // machineTrackers holds the workers which track the machines we 111 // are currently watching (all the controller machines). 112 machineTrackers map[string]*machineTracker 113 114 // detailsRequests is used to feed details requests from the hub into the main loop. 115 detailsRequests chan string 116 117 // serverDetails holds the last server information broadcast via pub/sub. 118 // It is used to detect changes since the last publish. 119 serverDetails apiserver.Details 120 } 121 122 // Config holds the configuration for a peergrouper worker. 123 type Config struct { 124 State State 125 APIHostPortsSetter APIHostPortsSetter 126 MongoSession MongoSession 127 Clock clock.Clock 128 SupportsSpaces bool 129 MongoPort int 130 APIPort int 131 ControllerAPIPort int 132 133 // Hub is the central hub of the apiserver, 134 // and is used to publish the details of the 135 // API servers. 136 Hub Hub 137 } 138 139 // Validate validates the worker configuration. 140 func (config Config) Validate() error { 141 if config.State == nil { 142 return errors.NotValidf("nil State") 143 } 144 if config.APIHostPortsSetter == nil { 145 return errors.NotValidf("nil APIHostPortsSetter") 146 } 147 if config.MongoSession == nil { 148 return errors.NotValidf("nil MongoSession") 149 } 150 if config.Clock == nil { 151 return errors.NotValidf("nil Clock") 152 } 153 if config.Hub == nil { 154 return errors.NotValidf("nil Hub") 155 } 156 if config.MongoPort <= 0 { 157 return errors.NotValidf("non-positive MongoPort") 158 } 159 if config.APIPort <= 0 { 160 return errors.NotValidf("non-positive APIPort") 161 } 162 // TODO Juju 3.0: make ControllerAPIPort required. 163 return nil 164 } 165 166 // New returns a new worker that maintains the mongo replica set 167 // with respect to the given state. 168 func New(config Config) (worker.Worker, error) { 169 if err := config.Validate(); err != nil { 170 return nil, errors.Trace(err) 171 } 172 173 w := &pgWorker{ 174 config: config, 175 machineChanges: make(chan struct{}), 176 machineTrackers: make(map[string]*machineTracker), 177 detailsRequests: make(chan string), 178 } 179 err := catacomb.Invoke(catacomb.Plan{ 180 Site: &w.catacomb, 181 Work: w.loop, 182 }) 183 if err != nil { 184 return nil, errors.Trace(err) 185 } 186 return w, nil 187 } 188 189 // Kill is part of the worker.Worker interface. 190 func (w *pgWorker) Kill() { 191 w.catacomb.Kill(nil) 192 } 193 194 // Wait is part of the worker.Worker interface. 195 func (w *pgWorker) Wait() error { 196 return w.catacomb.Wait() 197 } 198 199 func (w *pgWorker) loop() error { 200 controllerChanges, err := w.watchForControllerChanges() 201 if err != nil { 202 return errors.Trace(err) 203 } 204 205 configChanges, err := w.watchForConfigChanges() 206 if err != nil { 207 return errors.Trace(err) 208 } 209 210 unsubscribe, err := w.config.Hub.Subscribe(apiserver.DetailsRequestTopic, w.apiserverDetailsRequested) 211 if err != nil { 212 return errors.Trace(err) 213 } 214 defer unsubscribe() 215 216 var updateChan <-chan time.Time 217 retryInterval := initialRetryInterval 218 219 for { 220 logger.Tracef("waiting...") 221 select { 222 case <-w.catacomb.Dying(): 223 return w.catacomb.ErrDying() 224 case <-controllerChanges: 225 // A controller machine was added or removed. 226 logger.Tracef("<-controllerChanges") 227 changed, err := w.updateControllerMachines() 228 if err != nil { 229 return errors.Trace(err) 230 } 231 if !changed { 232 continue 233 } 234 logger.Tracef("controller added or removed, update replica now") 235 case <-w.machineChanges: 236 // One of the controller machines changed. 237 logger.Tracef("<-w.machineChanges") 238 case <-configChanges: 239 // Controller config has changed. 240 logger.Tracef("<-w.configChanges") 241 242 // If a config change wakes up the loop before the topology has 243 // been represented in the worker's machine trackers, ignore it; 244 // errors will occur when trying to determine peer group changes. 245 // Continuing is OK because subsequent invocations of the loop will 246 // pick up the most recent config from state anyway. 247 if len(w.machineTrackers) == 0 { 248 logger.Tracef("no controller information, ignoring config change") 249 continue 250 } 251 case requester := <-w.detailsRequests: 252 // A client requested the details be resent (probably 253 // because they just subscribed). 254 logger.Tracef("<-w.detailsRequests (from %q)", requester) 255 w.config.Hub.Publish(apiserver.DetailsTopic, w.serverDetails) 256 continue 257 case <-updateChan: 258 // Scheduled update. 259 logger.Tracef("<-updateChan") 260 updateChan = nil 261 } 262 263 servers := w.apiServerHostPorts() 264 apiHostPorts := make([][]network.HostPort, 0, len(servers)) 265 for _, serverHostPorts := range servers { 266 apiHostPorts = append(apiHostPorts, serverHostPorts) 267 } 268 269 var failed bool 270 if err := w.config.APIHostPortsSetter.SetAPIHostPorts(apiHostPorts); err != nil { 271 logger.Errorf("cannot write API server addresses: %v", err) 272 failed = true 273 } 274 275 members, err := w.updateReplicaSet() 276 if err != nil { 277 if _, isReplicaSetError := err.(*replicaSetError); isReplicaSetError { 278 logger.Errorf("cannot set replicaset: %v", err) 279 } else if _, isStepDownPrimary := err.(*stepDownPrimaryError); !isStepDownPrimary { 280 return errors.Trace(err) 281 } 282 // both replicaset errors and stepping down the primary are both considered fast-retry 'failures'. 283 // we need to re-read the state after a short timeout and re-evaluate the replicaset. 284 failed = true 285 } 286 w.publishAPIServerDetails(servers, members) 287 288 if failed { 289 logger.Tracef("failed, waking up after: %v", retryInterval) 290 updateChan = w.config.Clock.After(retryInterval) 291 retryInterval = scaleRetry(retryInterval) 292 } else { 293 // Update the replica set members occasionally to keep them up to 294 // date with the current replica-set member statuses. 295 logger.Tracef("succeeded, waking up after: %v", pollInterval) 296 if updateChan == nil { 297 updateChan = w.config.Clock.After(pollInterval) 298 } 299 retryInterval = initialRetryInterval 300 } 301 } 302 } 303 304 func scaleRetry(value time.Duration) time.Duration { 305 value *= 2 306 if value > maxRetryInterval { 307 value = maxRetryInterval 308 } 309 return value 310 } 311 312 // watchForControllerChanges starts two watchers for changes to controller 313 // info and status. 314 // It returns a channel which will receive events if any of the watchers fires. 315 func (w *pgWorker) watchForControllerChanges() (<-chan struct{}, error) { 316 controllerInfoWatcher := w.config.State.WatchControllerInfo() 317 if err := w.catacomb.Add(controllerInfoWatcher); err != nil { 318 return nil, errors.Trace(err) 319 } 320 321 controllerStatusWatcher := w.config.State.WatchControllerStatusChanges() 322 if err := w.catacomb.Add(controllerStatusWatcher); err != nil { 323 return nil, errors.Trace(err) 324 } 325 326 out := make(chan struct{}) 327 go func() { 328 for { 329 select { 330 case <-w.catacomb.Dying(): 331 return 332 case <-controllerInfoWatcher.Changes(): 333 out <- struct{}{} 334 case <-controllerStatusWatcher.Changes(): 335 out <- struct{}{} 336 } 337 } 338 }() 339 return out, nil 340 } 341 342 // watchForConfigChanges starts a watcher for changes to controller config. 343 // It returns a channel which will receive events if the watcher fires. 344 // This is separate from watchForControllerChanges because of the worker loop 345 // logic. If controller machines have not changed, then further processing 346 // does not occur, whereas we want to re-publish API addresses and check 347 // for replica-set changes if either the management or HA space configs have 348 // changed. 349 func (w *pgWorker) watchForConfigChanges() (<-chan struct{}, error) { 350 controllerConfigWatcher := w.config.State.WatchControllerConfig() 351 if err := w.catacomb.Add(controllerConfigWatcher); err != nil { 352 return nil, errors.Trace(err) 353 } 354 return controllerConfigWatcher.Changes(), nil 355 } 356 357 // updateControllerMachines updates the peergrouper's current list of 358 // controller machines, as well as starting and stopping trackers for 359 // them as they are added and removed. 360 func (w *pgWorker) updateControllerMachines() (bool, error) { 361 info, err := w.config.State.ControllerInfo() 362 if err != nil { 363 return false, fmt.Errorf("cannot get controller info: %v", err) 364 } 365 366 logger.Debugf("controller machines in state: %#v", info.MachineIds) 367 changed := false 368 369 // Stop machine goroutines that no longer correspond to controller 370 // machines. 371 for _, m := range w.machineTrackers { 372 if !inStrings(m.Id(), info.MachineIds) { 373 worker.Stop(m) 374 delete(w.machineTrackers, m.Id()) 375 changed = true 376 } 377 } 378 379 // Start machines with no watcher 380 for _, id := range info.MachineIds { 381 stm, err := w.config.State.Machine(id) 382 if err != nil { 383 if errors.IsNotFound(err) { 384 // If the machine isn't found, it must have been 385 // removed and will soon enough be removed 386 // from the controller list. This will probably 387 // never happen, but we'll code defensively anyway. 388 logger.Warningf("machine %q from controller list not found", id) 389 continue 390 } 391 return false, fmt.Errorf("cannot get machine %q: %v", id, err) 392 } 393 if _, ok := w.machineTrackers[id]; ok { 394 continue 395 } 396 logger.Debugf("found new machine %q", id) 397 398 // Don't add the machine unless it is "Started" 399 machineStatus, err := stm.Status() 400 if err != nil { 401 return false, errors.Annotatef(err, "cannot get status for machine %q", id) 402 } 403 // A machine in status Error or Stopped might still be properly running the controller. We still want to treat 404 // it as an active machine, even if we're trying to tear it down. 405 if machineStatus.Status != status.Pending { 406 logger.Debugf("machine %q has started, adding it to peergrouper list", id) 407 tracker, err := newMachineTracker(stm, w.machineChanges) 408 if err != nil { 409 return false, errors.Trace(err) 410 } 411 if err := w.catacomb.Add(tracker); err != nil { 412 return false, errors.Trace(err) 413 } 414 w.machineTrackers[id] = tracker 415 changed = true 416 } else { 417 logger.Debugf("machine %q not ready: %v", id, machineStatus.Status) 418 } 419 420 } 421 return changed, nil 422 } 423 424 func (w *pgWorker) apiserverDetailsRequested(topic string, request apiserver.DetailsRequest, err error) { 425 if err != nil { 426 // This shouldn't happen (barring programmer error ;) - treat it as fatal. 427 w.catacomb.Kill(errors.Annotate(err, "apiserver details request callback failed")) 428 return 429 } 430 select { 431 case w.detailsRequests <- request.Requester: 432 case <-w.catacomb.Dying(): 433 } 434 } 435 436 func inStrings(t string, ss []string) bool { 437 for _, s := range ss { 438 if s == t { 439 return true 440 } 441 } 442 return false 443 } 444 445 // apiServerHostPorts returns the host-ports for each apiserver machine. 446 func (w *pgWorker) apiServerHostPorts() map[string][]network.HostPort { 447 servers := make(map[string][]network.HostPort) 448 for _, m := range w.machineTrackers { 449 hostPorts := network.AddressesWithPort(m.Addresses(), w.config.APIPort) 450 if len(hostPorts) == 0 { 451 continue 452 } 453 servers[m.Id()] = hostPorts 454 } 455 return servers 456 } 457 458 // publishAPIServerDetails publishes the details corresponding to the latest 459 // known controller/replica-set topology if it has changed from the last known 460 // state. 461 func (w *pgWorker) publishAPIServerDetails( 462 servers map[string][]network.HostPort, 463 members map[string]*replicaset.Member, 464 ) { 465 details := apiserver.Details{ 466 Servers: make(map[string]apiserver.APIServer), 467 LocalOnly: true, 468 } 469 internalPort := w.config.ControllerAPIPort 470 if internalPort == 0 { 471 internalPort = w.config.APIPort 472 } 473 for id, hostPorts := range servers { 474 var internalAddress string 475 if members[id] != nil { 476 mongoAddress, _, err := net.SplitHostPort(members[id].Address) 477 if err == nil { 478 internalAddress = net.JoinHostPort(mongoAddress, strconv.Itoa(internalPort)) 479 } 480 } 481 server := apiserver.APIServer{ 482 ID: id, 483 InternalAddress: internalAddress, 484 } 485 for _, hp := range network.FilterUnusableHostPorts(hostPorts) { 486 server.Addresses = append(server.Addresses, hp.String()) 487 } 488 sort.Strings(server.Addresses) 489 details.Servers[server.ID] = server 490 } 491 492 if !reflect.DeepEqual(w.serverDetails, details) { 493 w.config.Hub.Publish(apiserver.DetailsTopic, details) 494 w.serverDetails = details 495 } 496 } 497 498 // replicaSetError holds an error returned as a result 499 // of calling replicaset.Set. As this is expected to fail 500 // in the normal course of things, it needs special treatment. 501 type replicaSetError struct { 502 error 503 } 504 505 // stepDownPrimaryError means we needed to ask the primary to step down, so we should come back and re-evaluate the 506 // replicaset once the new primary is voted in 507 type stepDownPrimaryError struct { 508 error 509 } 510 511 // updateReplicaSet sets the current replica set members, and applies the 512 // given voting status to machines in the state. A mapping of machine ID 513 // to replicaset.Member structures is returned. 514 func (w *pgWorker) updateReplicaSet() (map[string]*replicaset.Member, error) { 515 info, err := w.peerGroupInfo() 516 if err != nil { 517 return nil, errors.Annotate(err, "creating peer group info") 518 } 519 desired, err := desiredPeerGroup(info) 520 // membersChanged, members, voting, err 521 if err != nil { 522 return nil, errors.Annotate(err, "computing desired peer group") 523 } 524 if logger.IsDebugEnabled() { 525 if desired.isChanged { 526 logger.Debugf("desired peer group members: \n%s", prettyReplicaSetMembers(desired.members)) 527 } else { 528 var output []string 529 for id, v := range desired.machineVoting { 530 output = append(output, fmt.Sprintf(" %s: %v", id, v)) 531 } 532 logger.Debugf("no change in desired peer group, voting: \n%s", strings.Join(output, "\n")) 533 } 534 } 535 536 if desired.stepDownPrimary { 537 logger.Infof("mongo primary machine needs to be removed, first requesting it to step down") 538 if err := w.config.MongoSession.StepDownPrimary(); err != nil { 539 // StepDownPrimary should have already handled the io.EOF that mongo might give, so any error we 540 // get is unknown 541 return nil, errors.Annotate(err, "asking primary to step down") 542 } 543 // Asking the Primary to step down forces us to disconnect from Mongo, but session.Refresh() should get us 544 // reconnected so we can keep operating 545 w.config.MongoSession.Refresh() 546 // However, we no longer know who the primary is, so we have to error out and have it reevaluated 547 return nil, &stepDownPrimaryError{ 548 error: errors.Errorf("primary is stepping down, must reevaluate peer group"), 549 } 550 } 551 552 // We cannot change the HasVote flag of a machine in state at exactly 553 // the same moment as changing its voting status in the replica set. 554 // 555 // Thus we need to be careful that a machine which is actually a voting 556 // member is not seen to not have a vote, because otherwise 557 // there is nothing to prevent the machine being removed. 558 // 559 // To avoid this happening, we make sure when we call SetReplicaSet, 560 // that the voting status of machines is the union of both old 561 // and new voting machines - that is the set of HasVote machines 562 // is a superset of all the actual voting machines. 563 // 564 // Only after the call has taken place do we reset the voting status 565 // of the machines that have lost their vote. 566 // 567 // If there's a crash, the voting status may not reflect the 568 // actual voting status for a while, but when things come 569 // back on line, it will be sorted out, as desiredReplicaSet 570 // will return the actual voting status. 571 // 572 // Note that we potentially update the HasVote status of the machines even 573 // if the members have not changed. 574 var added, removed []*machineTracker 575 // Iterate in obvious order so we don't get weird log messages 576 votingIds := make([]string, 0, len(desired.machineVoting)) 577 for id := range desired.machineVoting { 578 votingIds = append(votingIds, id) 579 } 580 sortAsInts(votingIds) 581 for _, id := range votingIds { 582 hasVote := desired.machineVoting[id] 583 m := info.machines[id] 584 switch { 585 case hasVote && !m.stm.HasVote(): 586 added = append(added, m) 587 case !hasVote && m.stm.HasVote(): 588 removed = append(removed, m) 589 } 590 } 591 if err := setHasVote(added, true); err != nil { 592 return nil, errors.Annotate(err, "adding new voters") 593 } 594 if desired.isChanged { 595 ms := make([]replicaset.Member, 0, len(desired.members)) 596 for _, m := range desired.members { 597 ms = append(ms, *m) 598 } 599 if err := w.config.MongoSession.Set(ms); err != nil { 600 // We've failed to set the replica set, so revert back 601 // to the previous settings. 602 if err1 := setHasVote(added, false); err1 != nil { 603 logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1) 604 } 605 return nil, &replicaSetError{err} 606 } 607 logger.Infof("successfully updated replica set") 608 } 609 if err := setHasVote(removed, false); err != nil { 610 return nil, errors.Annotate(err, "removing non-voters") 611 } 612 613 // Reset machine status for members of the changed peer-group. 614 // Any previous peer-group determination errors result in status 615 // warning messages. 616 for id := range desired.members { 617 if err := w.machineTrackers[id].stm.SetStatus(getStatusInfo("")); err != nil { 618 return nil, errors.Trace(err) 619 } 620 } 621 for _, tracker := range info.machines { 622 if tracker.stm.Life() != state.Alive && !tracker.stm.HasVote() { 623 logger.Debugf("removing dying controller machine %s", tracker.Id()) 624 if err := w.config.State.RemoveControllerMachine(tracker.stm); err != nil { 625 logger.Errorf("failed to remove dying machine as a controller after removing its vote: %v", err) 626 } 627 } 628 } 629 for _, removedTracker := range removed { 630 if removedTracker.stm.Life() == state.Alive { 631 logger.Debugf("vote removed from %v but machine is %s", removedTracker.Id(), state.Alive) 632 } 633 } 634 return desired.members, nil 635 } 636 637 func prettyReplicaSetMembers(members map[string]*replicaset.Member) string { 638 var result []string 639 // Its easier to read if we sort by Id. 640 keys := make([]string, 0, len(members)) 641 for key := range members { 642 keys = append(keys, key) 643 } 644 sort.Strings(keys) 645 for _, key := range keys { 646 m := members[key] 647 voting := "not-voting" 648 if isVotingMember(m) { 649 voting = "voting" 650 } 651 result = append(result, fmt.Sprintf(" Id: %d, Tags: %v, %s", m.Id, m.Tags, voting)) 652 } 653 return strings.Join(result, "\n") 654 } 655 656 // peerGroupInfo collates current session information about the 657 // mongo peer group with information from state machines. 658 func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) { 659 sts, err := w.config.MongoSession.CurrentStatus() 660 if err != nil { 661 return nil, errors.Annotate(err, "cannot get replica set status") 662 } 663 664 members, err := w.config.MongoSession.CurrentMembers() 665 if err != nil { 666 return nil, errors.Annotate(err, "cannot get replica set members") 667 } 668 669 haSpace, err := w.getHASpaceFromConfig() 670 if err != nil { 671 return nil, err 672 } 673 674 logger.Tracef("read peer group info: %# v\n%# v", pretty.Formatter(sts), pretty.Formatter(members)) 675 return newPeerGroupInfo(w.machineTrackers, sts.Members, members, w.config.MongoPort, haSpace) 676 } 677 678 // getHASpaceFromConfig returns a SpaceName from the controller config for 679 // HA space. If unset, the empty space ("") will be returned. 680 func (w *pgWorker) getHASpaceFromConfig() (network.SpaceName, error) { 681 config, err := w.config.State.ControllerConfig() 682 if err != nil { 683 return network.SpaceName(""), err 684 } 685 return network.SpaceName(config.JujuHASpace()), nil 686 } 687 688 // setHasVote sets the HasVote status of all the given machines to hasVote. 689 func setHasVote(ms []*machineTracker, hasVote bool) error { 690 if len(ms) == 0 { 691 return nil 692 } 693 logger.Infof("setting HasVote=%v on machines %v", hasVote, ms) 694 for _, m := range ms { 695 if err := m.stm.SetHasVote(hasVote); err != nil { 696 return fmt.Errorf("cannot set voting status of %q to %v: %v", m.Id(), hasVote, err) 697 } 698 } 699 return nil 700 }