github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/peergrouper/worker.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "fmt" 8 "net" 9 "reflect" 10 "sort" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/juju/clock" 16 "github.com/juju/collections/set" 17 "github.com/juju/errors" 18 "github.com/juju/loggo" 19 "github.com/juju/replicaset/v3" 20 "github.com/juju/worker/v3" 21 "github.com/juju/worker/v3/catacomb" 22 "github.com/kr/pretty" 23 "github.com/prometheus/client_golang/prometheus" 24 25 "github.com/juju/juju/controller" 26 "github.com/juju/juju/core/network" 27 "github.com/juju/juju/core/status" 28 "github.com/juju/juju/pubsub/apiserver" 29 "github.com/juju/juju/state" 30 ) 31 32 var logger = loggo.GetLogger("juju.worker.peergrouper") 33 34 type State interface { 35 RemoveControllerReference(m ControllerNode) error 36 ControllerConfig() (controller.Config, error) 37 ControllerIds() ([]string, error) 38 ControllerNode(id string) (ControllerNode, error) 39 ControllerHost(id string) (ControllerHost, error) 40 WatchControllerInfo() state.StringsWatcher 41 WatchControllerStatusChanges() state.StringsWatcher 42 WatchControllerConfig() state.NotifyWatcher 43 Space(name string) (Space, error) 44 } 45 46 type ControllerNode interface { 47 Id() string 48 Refresh() error 49 Watch() state.NotifyWatcher 50 WantsVote() bool 51 HasVote() bool 52 SetHasVote(hasVote bool) error 53 } 54 55 type ControllerHost interface { 56 Id() string 57 Life() state.Life 58 Watch() state.NotifyWatcher 59 SetStatus(status.StatusInfo) error 60 Refresh() error 61 Addresses() network.SpaceAddresses 62 } 63 64 type Space interface { 65 NetworkSpace() (network.SpaceInfo, error) 66 } 67 68 type MongoSession interface { 69 CurrentStatus() (*replicaset.Status, error) 70 CurrentMembers() ([]replicaset.Member, error) 71 Set([]replicaset.Member) error 72 StepDownPrimary() error 73 Refresh() 74 } 75 76 type APIHostPortsSetter interface { 77 SetAPIHostPorts([]network.SpaceHostPorts) error 78 } 79 80 var ( 81 // If we fail to set the mongo replica set members, 82 // we start retrying with the following interval, 83 // before exponentially backing off with each further 84 // attempt. 85 initialRetryInterval = 2 * time.Second 86 87 // maxRetryInterval holds the maximum interval 88 // between retry attempts. 89 maxRetryInterval = 5 * time.Minute 90 91 // pollInterval holds the interval at which the replica set 92 // members will be updated even in the absence of changes 93 // to State. This enables us to make changes to members 94 // that are triggered by changes to member status. 95 pollInterval = 1 * time.Minute 96 97 // IdleFunc allows tests to be able to get callbacks when the controller 98 // hasn't been given any changes for a specified time. 99 IdleFunc func() 100 101 // IdleTime relates to how long the controller needs to wait with no changes 102 // to be considered idle. 103 IdleTime = 50 * time.Millisecond 104 ) 105 106 // Hub defines the methods of the apiserver centralhub that the peer 107 // grouper uses. 108 type Hub interface { 109 Subscribe(topic string, handler interface{}) (func(), error) 110 Publish(topic string, data interface{}) (func(), error) 111 } 112 113 // pgWorker is a worker which watches the controller nodes in state 114 // as well as the MongoDB replicaset configuration, adding and 115 // removing controller nodes as they change or are added and 116 // removed. 117 type pgWorker struct { 118 catacomb catacomb.Catacomb 119 120 config Config 121 122 // controllerChanges receives events from the controllerTrackers when 123 // controller nodes change in ways that are relevant to the 124 // peergrouper. 125 controllerChanges chan struct{} 126 127 // controllerTrackers holds the workers which track the nodes we 128 // are currently watching (all the controller nodes). 129 controllerTrackers map[string]*controllerTracker 130 131 // detailsRequests is used to feed details requests from the hub into the main loop. 132 detailsRequests chan string 133 134 // serverDetails holds the last server information broadcast via pub/sub. 135 // It is used to detect changes since the last publish. 136 serverDetails apiserver.Details 137 138 metrics *Collector 139 140 idleFunc func() 141 } 142 143 // Config holds the configuration for a peergrouper worker. 144 type Config struct { 145 State State 146 APIHostPortsSetter APIHostPortsSetter 147 MongoSession MongoSession 148 Clock clock.Clock 149 MongoPort int 150 APIPort int 151 ControllerAPIPort int 152 153 // ControllerId is the id of the controller running this worker. 154 // It is used in checking if this working is running on the 155 // primary mongo node. 156 ControllerId func() string 157 158 // Kubernetes controllers do not support HA yet. 159 SupportsHA bool 160 161 // Hub is the central hub of the apiserver, 162 // and is used to publish the details of the 163 // API servers. 164 Hub Hub 165 166 PrometheusRegisterer prometheus.Registerer 167 168 // UpdateNotify is called when the update channel is signalled. 169 // Used solely for test synchronization. 170 UpdateNotify func() 171 } 172 173 // Validate validates the worker configuration. 174 func (config Config) Validate() error { 175 if config.State == nil { 176 return errors.NotValidf("nil State") 177 } 178 if config.APIHostPortsSetter == nil { 179 return errors.NotValidf("nil APIHostPortsSetter") 180 } 181 if config.MongoSession == nil { 182 return errors.NotValidf("nil MongoSession") 183 } 184 if config.Clock == nil { 185 return errors.NotValidf("nil Clock") 186 } 187 if config.Hub == nil { 188 return errors.NotValidf("nil Hub") 189 } 190 if config.PrometheusRegisterer == nil { 191 return errors.NotValidf("nil PrometheusRegisterer") 192 } 193 if config.MongoPort <= 0 { 194 return errors.NotValidf("non-positive MongoPort") 195 } 196 if config.APIPort <= 0 { 197 return errors.NotValidf("non-positive APIPort") 198 } 199 // TODO Juju 3.0: make ControllerAPIPort required. 200 return nil 201 } 202 203 // New returns a new worker that maintains the mongo replica set 204 // with respect to the given state. 205 func New(config Config) (worker.Worker, error) { 206 if err := config.Validate(); err != nil { 207 return nil, errors.Trace(err) 208 } 209 210 w := &pgWorker{ 211 config: config, 212 controllerChanges: make(chan struct{}), 213 controllerTrackers: make(map[string]*controllerTracker), 214 detailsRequests: make(chan string), 215 idleFunc: IdleFunc, 216 metrics: NewMetricsCollector(), 217 } 218 err := catacomb.Invoke(catacomb.Plan{ 219 Site: &w.catacomb, 220 Work: w.loop, 221 }) 222 if err != nil { 223 return nil, errors.Trace(err) 224 } 225 return w, nil 226 } 227 228 // Kill is part of the worker.Worker interface. 229 func (w *pgWorker) Kill() { 230 w.catacomb.Kill(nil) 231 } 232 233 // Wait is part of the worker.Worker interface. 234 func (w *pgWorker) Wait() error { 235 return w.catacomb.Wait() 236 } 237 238 // Report is shown in the engine report. 239 func (w *pgWorker) Report() map[string]interface{} { 240 if w.metrics == nil { 241 return nil 242 } 243 return w.metrics.report() 244 } 245 246 func (w *pgWorker) loop() error { 247 _ = w.config.PrometheusRegisterer.Register(w.metrics) 248 defer w.config.PrometheusRegisterer.Unregister(w.metrics) 249 250 controllerChanges, err := w.watchForControllerChanges() 251 if err != nil { 252 return errors.Trace(err) 253 } 254 255 configChanges, err := w.watchForConfigChanges() 256 if err != nil { 257 return errors.Trace(err) 258 } 259 260 unsubscribe, err := w.config.Hub.Subscribe(apiserver.DetailsRequestTopic, w.apiserverDetailsRequested) 261 if err != nil { 262 return errors.Trace(err) 263 } 264 defer unsubscribe() 265 266 var updateChan <-chan time.Time 267 retryInterval := initialRetryInterval 268 269 idle := &time.Timer{} 270 if w.idleFunc != nil { 271 logger.Tracef("pgWorker %p set idle timeout to %s", w, IdleTime) 272 idle = time.NewTimer(IdleTime) 273 defer idle.Stop() 274 } 275 276 for { 277 logger.Tracef("waiting...") 278 select { 279 case <-w.catacomb.Dying(): 280 return w.catacomb.ErrDying() 281 case <-idle.C: 282 logger.Tracef("pgWorker %p is idle", w) 283 w.idleFunc() 284 idle.Reset(IdleTime) 285 continue 286 case <-controllerChanges: 287 // A controller was added or removed. 288 logger.Tracef("<-controllerChanges") 289 changed, err := w.updateControllerNodes() 290 if err != nil { 291 return errors.Trace(err) 292 } 293 if !changed { 294 continue 295 } 296 logger.Tracef("controller added or removed, update replica now") 297 case <-w.controllerChanges: 298 // One of the controller nodes changed. 299 logger.Tracef("<-w.controllerChanges") 300 case <-configChanges: 301 // Controller config has changed. 302 logger.Tracef("<-w.configChanges") 303 304 // If a config change wakes up the loop before the topology has 305 // been represented in the worker's controller trackers, ignore it; 306 // errors will occur when trying to determine peer group changes. 307 // Continuing is OK because subsequent invocations of the loop will 308 // pick up the most recent config from state anyway. 309 if len(w.controllerTrackers) == 0 { 310 logger.Tracef("no controller information, ignoring config change") 311 continue 312 } 313 case requester := <-w.detailsRequests: 314 // A client requested the details be resent (probably 315 // because they just subscribed). 316 logger.Tracef("<-w.detailsRequests (from %q)", requester) 317 _, _ = w.config.Hub.Publish(apiserver.DetailsTopic, w.serverDetails) 318 continue 319 case <-updateChan: 320 // Scheduled update. 321 logger.Tracef("<-updateChan") 322 updateChan = nil 323 if w.config.UpdateNotify != nil { 324 w.config.UpdateNotify() 325 } 326 } 327 328 servers := w.apiServerHostPorts() 329 apiHostPorts := make([]network.SpaceHostPorts, 0, len(servers)) 330 for _, serverHostPorts := range servers { 331 apiHostPorts = append(apiHostPorts, serverHostPorts) 332 } 333 334 var failed bool 335 if err := w.config.APIHostPortsSetter.SetAPIHostPorts(apiHostPorts); err != nil { 336 logger.Errorf("cannot write API server addresses: %v", err) 337 failed = true 338 } 339 340 members, err := w.updateReplicaSet() 341 if err != nil { 342 if errors.Is(err, replicaSetError) { 343 logger.Errorf("cannot set replicaset: %v", err) 344 } else if !errors.Is(err, stepDownPrimaryError) { 345 return errors.Trace(err) 346 } else { 347 logger.Tracef("isStepDownPrimary error: %v", err) 348 } 349 // both replicaset errors and stepping down the primary are both considered fast-retry 'failures'. 350 // we need to re-read the state after a short timeout and re-evaluate the replicaset. 351 failed = true 352 } 353 w.publishAPIServerDetails(servers, members) 354 355 if failed { 356 logger.Tracef("failed, will wake up after: %v", retryInterval) 357 updateChan = w.config.Clock.After(retryInterval) 358 retryInterval = scaleRetry(retryInterval) 359 } else { 360 // Update the replica set members occasionally to keep them up to 361 // date with the current replica-set member statuses. 362 // If we had previously failed to update the replicaset, 363 // the updateChan isn't set to the pollInterval. So if we had just 364 // processed an update, or have just succeeded after a failure reset 365 // the updateChan to the pollInterval. 366 if updateChan == nil || retryInterval != initialRetryInterval { 367 logger.Tracef("succeeded, will wake up after: %v", pollInterval) 368 updateChan = w.config.Clock.After(pollInterval) 369 } else { 370 logger.Tracef("succeeded, wait already pending") 371 } 372 retryInterval = initialRetryInterval 373 } 374 if w.idleFunc != nil { 375 idle.Reset(IdleTime) 376 } 377 } 378 } 379 380 func scaleRetry(value time.Duration) time.Duration { 381 value *= 2 382 if value > maxRetryInterval { 383 value = maxRetryInterval 384 } 385 return value 386 } 387 388 // watchForControllerChanges starts two watchers for changes to controller 389 // info and status. 390 // It returns a channel which will receive events if any of the watchers fires. 391 func (w *pgWorker) watchForControllerChanges() (<-chan struct{}, error) { 392 controllerInfoWatcher := w.config.State.WatchControllerInfo() 393 if err := w.catacomb.Add(controllerInfoWatcher); err != nil { 394 return nil, errors.Trace(err) 395 } 396 397 controllerStatusWatcher := w.config.State.WatchControllerStatusChanges() 398 if err := w.catacomb.Add(controllerStatusWatcher); err != nil { 399 return nil, errors.Trace(err) 400 } 401 402 out := make(chan struct{}) 403 var notifyCh chan struct{} 404 go func() { 405 for { 406 select { 407 case <-w.catacomb.Dying(): 408 return 409 case <-controllerInfoWatcher.Changes(): 410 notifyCh = out 411 case <-controllerStatusWatcher.Changes(): 412 notifyCh = out 413 case notifyCh <- struct{}{}: 414 notifyCh = nil 415 } 416 } 417 }() 418 return out, nil 419 } 420 421 // watchForConfigChanges starts a watcher for changes to controller config. 422 // It returns a channel which will receive events if the watcher fires. 423 // This is separate from watchForControllerChanges because of the worker loop 424 // logic. If controller nodes have not changed, then further processing 425 // does not occur, whereas we want to re-publish API addresses and check 426 // for replica-set changes if either the management or HA space configs have 427 // changed. 428 func (w *pgWorker) watchForConfigChanges() (<-chan struct{}, error) { 429 controllerConfigWatcher := w.config.State.WatchControllerConfig() 430 if err := w.catacomb.Add(controllerConfigWatcher); err != nil { 431 return nil, errors.Trace(err) 432 } 433 return controllerConfigWatcher.Changes(), nil 434 } 435 436 // updateControllerNodes updates the peergrouper's current list of 437 // controller nodes, as well as starting and stopping trackers for 438 // them as they are added and removed. 439 func (w *pgWorker) updateControllerNodes() (bool, error) { 440 controllerIds, err := w.config.State.ControllerIds() 441 if err != nil { 442 return false, fmt.Errorf("cannot get controller ids: %v", err) 443 } 444 445 logger.Debugf("controller nodes in state: %#v", controllerIds) 446 changed := false 447 448 // Stop controller goroutines that no longer correspond to controller nodes. 449 for _, m := range w.controllerTrackers { 450 if !inStrings(m.Id(), controllerIds) { 451 _ = worker.Stop(m) 452 delete(w.controllerTrackers, m.Id()) 453 changed = true 454 } 455 } 456 457 // Start nodes with no watcher 458 for _, id := range controllerIds { 459 controllerNode, err := w.config.State.ControllerNode(id) 460 if err != nil { 461 if errors.IsNotFound(err) { 462 // If the controller isn't found, it must have been 463 // removed and will soon enough be removed 464 // from the controller list. This will probably 465 // never happen, but we'll code defensively anyway. 466 logger.Warningf("controller %q from controller list not found", id) 467 continue 468 } 469 return false, fmt.Errorf("cannot get controller %q: %v", id, err) 470 } 471 controllerHost, err := w.config.State.ControllerHost(id) 472 if err != nil { 473 if errors.IsNotFound(err) { 474 // If the controller isn't found, it must have been 475 // removed and will soon enough be removed 476 // from the controller list. This will probably 477 // never happen, but we'll code defensively anyway. 478 logger.Warningf("controller %q from controller list not found", id) 479 continue 480 } 481 return false, fmt.Errorf("cannot get controller %q: %v", id, err) 482 } 483 if _, ok := w.controllerTrackers[id]; ok { 484 continue 485 } 486 487 logger.Debugf("found new controller %q", id) 488 tracker, err := newControllerTracker(controllerNode, controllerHost, w.controllerChanges) 489 if err != nil { 490 return false, errors.Trace(err) 491 } 492 if err := w.catacomb.Add(tracker); err != nil { 493 return false, errors.Trace(err) 494 } 495 w.controllerTrackers[id] = tracker 496 changed = true 497 } 498 499 return changed, nil 500 } 501 502 func (w *pgWorker) apiserverDetailsRequested(topic string, request apiserver.DetailsRequest, err error) { 503 if err != nil { 504 // This shouldn't happen (barring programmer error ;) - treat it as fatal. 505 w.catacomb.Kill(errors.Annotate(err, "apiserver details request callback failed")) 506 return 507 } 508 select { 509 case w.detailsRequests <- request.Requester: 510 case <-w.catacomb.Dying(): 511 } 512 } 513 514 func inStrings(t string, ss []string) bool { 515 for _, s := range ss { 516 if s == t { 517 return true 518 } 519 } 520 return false 521 } 522 523 // apiServerHostPorts returns the host-ports for each apiserver controller. 524 func (w *pgWorker) apiServerHostPorts() map[string]network.SpaceHostPorts { 525 servers := make(map[string]network.SpaceHostPorts) 526 for _, m := range w.controllerTrackers { 527 hostPorts := network.SpaceAddressesWithPort(m.Addresses(), w.config.APIPort) 528 if len(hostPorts) == 0 { 529 continue 530 } 531 servers[m.Id()] = hostPorts 532 } 533 return servers 534 } 535 536 // publishAPIServerDetails publishes the details corresponding to the latest 537 // known controller/replica-set topology if it has changed from the last known 538 // state. 539 func (w *pgWorker) publishAPIServerDetails( 540 servers map[string]network.SpaceHostPorts, 541 members map[string]*replicaset.Member, 542 ) { 543 details := apiserver.Details{ 544 Servers: make(map[string]apiserver.APIServer), 545 LocalOnly: true, 546 } 547 internalPort := w.config.ControllerAPIPort 548 if internalPort == 0 { 549 internalPort = w.config.APIPort 550 } 551 for id, hostPorts := range servers { 552 var internalAddress string 553 if members[id] != nil { 554 mongoAddress, _, err := net.SplitHostPort(members[id].Address) 555 if err != nil { 556 logger.Errorf("splitting host/port for address %q: %v", members[id].Address, err) 557 } else { 558 internalAddress = net.JoinHostPort(mongoAddress, strconv.Itoa(internalPort)) 559 } 560 } else { 561 logger.Tracef("replica-set member %q not found", id) 562 } 563 564 server := apiserver.APIServer{ 565 ID: id, 566 InternalAddress: internalAddress, 567 } 568 for _, hp := range hostPorts.HostPorts().FilterUnusable() { 569 server.Addresses = append(server.Addresses, network.DialAddress(hp)) 570 } 571 sort.Strings(server.Addresses) 572 details.Servers[server.ID] = server 573 } 574 575 if !reflect.DeepEqual(w.serverDetails, details) { 576 _, _ = w.config.Hub.Publish(apiserver.DetailsTopic, details) 577 w.serverDetails = details 578 } 579 } 580 581 // replicaSetError means an error occurred as a result 582 // of calling replicaset.Set. As this is expected to fail 583 // in the normal course of things, it needs special treatment. 584 const replicaSetError = errors.ConstError("replicaset error") 585 586 // stepDownPrimaryError means we needed to ask the primary to step down, so we should come back and re-evaluate the 587 // replicaset once the new primary is voted in 588 const stepDownPrimaryError = errors.ConstError("primary is stepping down, must reevaluate peer group") 589 590 // updateReplicaSet sets the current replica set members, and applies the 591 // given voting status to nodes in the state. A mapping of controller ID 592 // to replicaset.Member structures is returned. 593 func (w *pgWorker) updateReplicaSet() (map[string]*replicaset.Member, error) { 594 info, err := w.peerGroupInfo() 595 if err != nil { 596 return nil, errors.Annotate(err, "creating peer group info") 597 } 598 // Update the metrics collector with the replicaset statuses. 599 w.metrics.update(info.statuses) 600 desired, err := desiredPeerGroup(info) 601 // membersChanged, members, voting, err 602 if err != nil { 603 return nil, errors.Annotate(err, "computing desired peer group") 604 } 605 if logger.IsDebugEnabled() { 606 if desired.isChanged { 607 logger.Debugf("desired peer group members: \n%s", prettyReplicaSetMembers(desired.members)) 608 } else { 609 var output []string 610 for id, m := range desired.members { 611 output = append(output, fmt.Sprintf(" %s: %v", id, isVotingMember(m))) 612 } 613 logger.Debugf("no change in desired peer group, voting: \n%s", strings.Join(output, "\n")) 614 } 615 } 616 617 if desired.stepDownPrimary { 618 logger.Infof("mongo primary controller needs to be removed, first requesting it to step down") 619 if err := w.config.MongoSession.StepDownPrimary(); err != nil { 620 // StepDownPrimary should have already handled the io.EOF that mongo might give, so any error we 621 // get is unknown 622 return nil, errors.Annotate(err, "asking primary to step down") 623 } 624 // Asking the Primary to step down forces us to disconnect from Mongo, but session.Refresh() should get us 625 // reconnected so we can keep operating 626 w.config.MongoSession.Refresh() 627 // However, we no longer know who the primary is, so we have to error out and have it reevaluated 628 return nil, stepDownPrimaryError 629 } 630 631 // Figure out if we are running on the mongo primary. 632 controllerId := w.config.ControllerId() 633 isPrimary, err := info.isPrimary(controllerId) 634 if err != nil && !errors.IsNotFound(err) { 635 return nil, errors.Annotatef(err, "determining primary status of controller %q", controllerId) 636 } 637 logger.Debugf("controller node %q primary: %v", controllerId, isPrimary) 638 if !isPrimary { 639 return desired.members, nil 640 } 641 642 // Currently k8s controllers do not support HA, so only update 643 // the replicaset config if HA is enabled and there is a change. 644 // Only controllers corresponding with the mongo primary should 645 // update the replicaset, otherwise there will be a race since 646 // a diff needs to be calculated so the changes can be applied 647 // one at a time. 648 if w.config.SupportsHA && desired.isChanged { 649 ms := make([]replicaset.Member, 0, len(desired.members)) 650 ids := make([]string, 0, len(desired.members)) 651 for id := range desired.members { 652 ids = append(ids, id) 653 } 654 sortAsInts(ids) 655 for _, id := range ids { 656 m := desired.members[id] 657 ms = append(ms, *m) 658 } 659 if err := w.config.MongoSession.Set(ms); err != nil { 660 return nil, errors.WithType(err, replicaSetError) 661 } 662 logger.Infof("successfully updated replica set") 663 } 664 665 // Reset controller status for members of the changed peer-group. 666 // Any previous peer-group determination errors result in status 667 // warning messages. 668 for id := range desired.members { 669 if err := w.controllerTrackers[id].host.SetStatus(getStatusInfo("")); err != nil { 670 return nil, errors.Trace(err) 671 } 672 } 673 if err := w.updateVoteStatus(); err != nil { 674 return nil, errors.Trace(err) 675 } 676 for _, tracker := range w.controllerTrackers { 677 if tracker.host.Life() != state.Alive && !tracker.node.HasVote() { 678 logger.Debugf("removing dying controller %s references", tracker.Id()) 679 if err := w.config.State.RemoveControllerReference(tracker.node); err != nil { 680 logger.Errorf("failed to remove dying controller as a controller after removing its vote: %v", err) 681 } 682 } 683 } 684 return desired.members, nil 685 } 686 687 func (w *pgWorker) updateVoteStatus() error { 688 currentMembers, err := w.config.MongoSession.CurrentMembers() 689 if err != nil { 690 return errors.Trace(err) 691 } 692 orphanedNodes := set.NewStrings() 693 for id := range w.controllerTrackers { 694 orphanedNodes.Add(id) 695 } 696 var voting, nonVoting []*controllerTracker 697 for _, m := range currentMembers { 698 node, ok := w.controllerTrackers[m.Tags[jujuNodeKey]] 699 orphanedNodes.Remove(node.Id()) 700 if ok { 701 if !node.HasVote() && isVotingMember(&m) { 702 logger.Tracef("controller %v is now voting member", node.Id()) 703 voting = append(voting, node) 704 } else if node.HasVote() && !isVotingMember(&m) { 705 logger.Tracef("controller %v is now non voting member", node.Id()) 706 nonVoting = append(nonVoting, node) 707 } 708 } 709 } 710 logger.Debugf("controllers that are no longer in replicaset: %v", orphanedNodes.Values()) 711 for _, id := range orphanedNodes.Values() { 712 node := w.controllerTrackers[id] 713 nonVoting = append(nonVoting, node) 714 } 715 if err := setHasVote(voting, true); err != nil { 716 return errors.Annotatef(err, "adding voters") 717 } 718 if err := setHasVote(nonVoting, false); err != nil { 719 return errors.Annotatef(err, "removing non-voters") 720 } 721 return nil 722 } 723 724 const ( 725 voting = "voting" 726 nonvoting = "non-voting" 727 ) 728 729 func prettyReplicaSetMembers(members map[string]*replicaset.Member) string { 730 var result []string 731 // It's easier to read if we sort by Id. 732 keys := make([]string, 0, len(members)) 733 for key := range members { 734 keys = append(keys, key) 735 } 736 sort.Strings(keys) 737 for _, key := range keys { 738 m := members[key] 739 voteStatus := nonvoting 740 if isVotingMember(m) { 741 voteStatus = voting 742 } 743 result = append(result, fmt.Sprintf(" Id: %d, Tags: %v, %s", m.Id, m.Tags, voteStatus)) 744 } 745 return strings.Join(result, "\n") 746 } 747 748 // peerGroupInfo collates current session information about the 749 // mongo peer group with information from state node instances. 750 func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) { 751 sts, err := w.config.MongoSession.CurrentStatus() 752 if err != nil { 753 return nil, errors.Annotate(err, "cannot get replica set status") 754 } 755 756 members, err := w.config.MongoSession.CurrentMembers() 757 if err != nil { 758 return nil, errors.Annotate(err, "cannot get replica set members") 759 } 760 761 haSpace, err := w.getHASpaceFromConfig() 762 if err != nil { 763 return nil, err 764 } 765 766 if logger.IsTraceEnabled() { 767 logger.Tracef("read peer group info: %# v\n%# v", pretty.Formatter(sts), pretty.Formatter(members)) 768 } 769 return newPeerGroupInfo(w.controllerTrackers, sts.Members, members, w.config.MongoPort, haSpace) 770 } 771 772 // getHASpaceFromConfig returns a space based on the controller's 773 // configuration for the HA space. 774 func (w *pgWorker) getHASpaceFromConfig() (network.SpaceInfo, error) { 775 config, err := w.config.State.ControllerConfig() 776 if err != nil { 777 return network.SpaceInfo{}, errors.Trace(err) 778 } 779 780 jujuHASpace := config.JujuHASpace() 781 if jujuHASpace == "" { 782 return network.SpaceInfo{}, nil 783 } 784 space, err := w.config.State.Space(jujuHASpace) 785 if err != nil { 786 return network.SpaceInfo{}, errors.Trace(err) 787 } 788 return space.NetworkSpace() 789 } 790 791 // setHasVote sets the HasVote status of all the given nodes to hasVote. 792 func setHasVote(ms []*controllerTracker, hasVote bool) error { 793 if len(ms) == 0 { 794 return nil 795 } 796 logger.Infof("setting HasVote=%v on nodes %v", hasVote, ms) 797 for _, m := range ms { 798 if err := m.node.SetHasVote(hasVote); err != nil { 799 return fmt.Errorf("cannot set voting status of %q to %v: %v", m.Id(), hasVote, err) 800 } 801 } 802 return nil 803 }