github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/peergrouper/worker.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"fmt"
     8  	"net"
     9  	"reflect"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/juju/clock"
    16  	"github.com/juju/collections/set"
    17  	"github.com/juju/errors"
    18  	"github.com/juju/loggo"
    19  	"github.com/juju/replicaset/v3"
    20  	"github.com/juju/worker/v3"
    21  	"github.com/juju/worker/v3/catacomb"
    22  	"github.com/kr/pretty"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  
    25  	"github.com/juju/juju/controller"
    26  	"github.com/juju/juju/core/network"
    27  	"github.com/juju/juju/core/status"
    28  	"github.com/juju/juju/pubsub/apiserver"
    29  	"github.com/juju/juju/state"
    30  )
    31  
    32  var logger = loggo.GetLogger("juju.worker.peergrouper")
    33  
    34  type State interface {
    35  	RemoveControllerReference(m ControllerNode) error
    36  	ControllerConfig() (controller.Config, error)
    37  	ControllerIds() ([]string, error)
    38  	ControllerNode(id string) (ControllerNode, error)
    39  	ControllerHost(id string) (ControllerHost, error)
    40  	WatchControllerInfo() state.StringsWatcher
    41  	WatchControllerStatusChanges() state.StringsWatcher
    42  	WatchControllerConfig() state.NotifyWatcher
    43  	Space(name string) (Space, error)
    44  }
    45  
    46  type ControllerNode interface {
    47  	Id() string
    48  	Refresh() error
    49  	Watch() state.NotifyWatcher
    50  	WantsVote() bool
    51  	HasVote() bool
    52  	SetHasVote(hasVote bool) error
    53  }
    54  
    55  type ControllerHost interface {
    56  	Id() string
    57  	Life() state.Life
    58  	Watch() state.NotifyWatcher
    59  	SetStatus(status.StatusInfo) error
    60  	Refresh() error
    61  	Addresses() network.SpaceAddresses
    62  }
    63  
    64  type Space interface {
    65  	NetworkSpace() (network.SpaceInfo, error)
    66  }
    67  
    68  type MongoSession interface {
    69  	CurrentStatus() (*replicaset.Status, error)
    70  	CurrentMembers() ([]replicaset.Member, error)
    71  	Set([]replicaset.Member) error
    72  	StepDownPrimary() error
    73  	Refresh()
    74  }
    75  
    76  type APIHostPortsSetter interface {
    77  	SetAPIHostPorts([]network.SpaceHostPorts) error
    78  }
    79  
    80  var (
    81  	// If we fail to set the mongo replica set members,
    82  	// we start retrying with the following interval,
    83  	// before exponentially backing off with each further
    84  	// attempt.
    85  	initialRetryInterval = 2 * time.Second
    86  
    87  	// maxRetryInterval holds the maximum interval
    88  	// between retry attempts.
    89  	maxRetryInterval = 5 * time.Minute
    90  
    91  	// pollInterval holds the interval at which the replica set
    92  	// members will be updated even in the absence of changes
    93  	// to State. This enables us to make changes to members
    94  	// that are triggered by changes to member status.
    95  	pollInterval = 1 * time.Minute
    96  
    97  	// IdleFunc allows tests to be able to get callbacks when the controller
    98  	// hasn't been given any changes for a specified time.
    99  	IdleFunc func()
   100  
   101  	// IdleTime relates to how long the controller needs to wait with no changes
   102  	// to be considered idle.
   103  	IdleTime = 50 * time.Millisecond
   104  )
   105  
   106  // Hub defines the methods of the apiserver centralhub that the peer
   107  // grouper uses.
   108  type Hub interface {
   109  	Subscribe(topic string, handler interface{}) (func(), error)
   110  	Publish(topic string, data interface{}) (func(), error)
   111  }
   112  
   113  // pgWorker is a worker which watches the controller nodes in state
   114  // as well as the MongoDB replicaset configuration, adding and
   115  // removing controller nodes as they change or are added and
   116  // removed.
   117  type pgWorker struct {
   118  	catacomb catacomb.Catacomb
   119  
   120  	config Config
   121  
   122  	// controllerChanges receives events from the controllerTrackers when
   123  	// controller nodes change in ways that are relevant to the
   124  	// peergrouper.
   125  	controllerChanges chan struct{}
   126  
   127  	// controllerTrackers holds the workers which track the nodes we
   128  	// are currently watching (all the controller nodes).
   129  	controllerTrackers map[string]*controllerTracker
   130  
   131  	// detailsRequests is used to feed details requests from the hub into the main loop.
   132  	detailsRequests chan string
   133  
   134  	// serverDetails holds the last server information broadcast via pub/sub.
   135  	// It is used to detect changes since the last publish.
   136  	serverDetails apiserver.Details
   137  
   138  	metrics *Collector
   139  
   140  	idleFunc func()
   141  }
   142  
   143  // Config holds the configuration for a peergrouper worker.
   144  type Config struct {
   145  	State              State
   146  	APIHostPortsSetter APIHostPortsSetter
   147  	MongoSession       MongoSession
   148  	Clock              clock.Clock
   149  	MongoPort          int
   150  	APIPort            int
   151  	ControllerAPIPort  int
   152  
   153  	// ControllerId is the id of the controller running this worker.
   154  	// It is used in checking if this working is running on the
   155  	// primary mongo node.
   156  	ControllerId func() string
   157  
   158  	// Kubernetes controllers do not support HA yet.
   159  	SupportsHA bool
   160  
   161  	// Hub is the central hub of the apiserver,
   162  	// and is used to publish the details of the
   163  	// API servers.
   164  	Hub Hub
   165  
   166  	PrometheusRegisterer prometheus.Registerer
   167  
   168  	// UpdateNotify is called when the update channel is signalled.
   169  	// Used solely for test synchronization.
   170  	UpdateNotify func()
   171  }
   172  
   173  // Validate validates the worker configuration.
   174  func (config Config) Validate() error {
   175  	if config.State == nil {
   176  		return errors.NotValidf("nil State")
   177  	}
   178  	if config.APIHostPortsSetter == nil {
   179  		return errors.NotValidf("nil APIHostPortsSetter")
   180  	}
   181  	if config.MongoSession == nil {
   182  		return errors.NotValidf("nil MongoSession")
   183  	}
   184  	if config.Clock == nil {
   185  		return errors.NotValidf("nil Clock")
   186  	}
   187  	if config.Hub == nil {
   188  		return errors.NotValidf("nil Hub")
   189  	}
   190  	if config.PrometheusRegisterer == nil {
   191  		return errors.NotValidf("nil PrometheusRegisterer")
   192  	}
   193  	if config.MongoPort <= 0 {
   194  		return errors.NotValidf("non-positive MongoPort")
   195  	}
   196  	if config.APIPort <= 0 {
   197  		return errors.NotValidf("non-positive APIPort")
   198  	}
   199  	// TODO Juju 3.0: make ControllerAPIPort required.
   200  	return nil
   201  }
   202  
   203  // New returns a new worker that maintains the mongo replica set
   204  // with respect to the given state.
   205  func New(config Config) (worker.Worker, error) {
   206  	if err := config.Validate(); err != nil {
   207  		return nil, errors.Trace(err)
   208  	}
   209  
   210  	w := &pgWorker{
   211  		config:             config,
   212  		controllerChanges:  make(chan struct{}),
   213  		controllerTrackers: make(map[string]*controllerTracker),
   214  		detailsRequests:    make(chan string),
   215  		idleFunc:           IdleFunc,
   216  		metrics:            NewMetricsCollector(),
   217  	}
   218  	err := catacomb.Invoke(catacomb.Plan{
   219  		Site: &w.catacomb,
   220  		Work: w.loop,
   221  	})
   222  	if err != nil {
   223  		return nil, errors.Trace(err)
   224  	}
   225  	return w, nil
   226  }
   227  
   228  // Kill is part of the worker.Worker interface.
   229  func (w *pgWorker) Kill() {
   230  	w.catacomb.Kill(nil)
   231  }
   232  
   233  // Wait is part of the worker.Worker interface.
   234  func (w *pgWorker) Wait() error {
   235  	return w.catacomb.Wait()
   236  }
   237  
   238  // Report is shown in the engine report.
   239  func (w *pgWorker) Report() map[string]interface{} {
   240  	if w.metrics == nil {
   241  		return nil
   242  	}
   243  	return w.metrics.report()
   244  }
   245  
   246  func (w *pgWorker) loop() error {
   247  	_ = w.config.PrometheusRegisterer.Register(w.metrics)
   248  	defer w.config.PrometheusRegisterer.Unregister(w.metrics)
   249  
   250  	controllerChanges, err := w.watchForControllerChanges()
   251  	if err != nil {
   252  		return errors.Trace(err)
   253  	}
   254  
   255  	configChanges, err := w.watchForConfigChanges()
   256  	if err != nil {
   257  		return errors.Trace(err)
   258  	}
   259  
   260  	unsubscribe, err := w.config.Hub.Subscribe(apiserver.DetailsRequestTopic, w.apiserverDetailsRequested)
   261  	if err != nil {
   262  		return errors.Trace(err)
   263  	}
   264  	defer unsubscribe()
   265  
   266  	var updateChan <-chan time.Time
   267  	retryInterval := initialRetryInterval
   268  
   269  	idle := &time.Timer{}
   270  	if w.idleFunc != nil {
   271  		logger.Tracef("pgWorker %p set idle timeout to %s", w, IdleTime)
   272  		idle = time.NewTimer(IdleTime)
   273  		defer idle.Stop()
   274  	}
   275  
   276  	for {
   277  		logger.Tracef("waiting...")
   278  		select {
   279  		case <-w.catacomb.Dying():
   280  			return w.catacomb.ErrDying()
   281  		case <-idle.C:
   282  			logger.Tracef("pgWorker %p is idle", w)
   283  			w.idleFunc()
   284  			idle.Reset(IdleTime)
   285  			continue
   286  		case <-controllerChanges:
   287  			// A controller was added or removed.
   288  			logger.Tracef("<-controllerChanges")
   289  			changed, err := w.updateControllerNodes()
   290  			if err != nil {
   291  				return errors.Trace(err)
   292  			}
   293  			if !changed {
   294  				continue
   295  			}
   296  			logger.Tracef("controller added or removed, update replica now")
   297  		case <-w.controllerChanges:
   298  			// One of the controller nodes changed.
   299  			logger.Tracef("<-w.controllerChanges")
   300  		case <-configChanges:
   301  			// Controller config has changed.
   302  			logger.Tracef("<-w.configChanges")
   303  
   304  			// If a config change wakes up the loop before the topology has
   305  			// been represented in the worker's controller trackers, ignore it;
   306  			// errors will occur when trying to determine peer group changes.
   307  			// Continuing is OK because subsequent invocations of the loop will
   308  			// pick up the most recent config from state anyway.
   309  			if len(w.controllerTrackers) == 0 {
   310  				logger.Tracef("no controller information, ignoring config change")
   311  				continue
   312  			}
   313  		case requester := <-w.detailsRequests:
   314  			// A client requested the details be resent (probably
   315  			// because they just subscribed).
   316  			logger.Tracef("<-w.detailsRequests (from %q)", requester)
   317  			_, _ = w.config.Hub.Publish(apiserver.DetailsTopic, w.serverDetails)
   318  			continue
   319  		case <-updateChan:
   320  			// Scheduled update.
   321  			logger.Tracef("<-updateChan")
   322  			updateChan = nil
   323  			if w.config.UpdateNotify != nil {
   324  				w.config.UpdateNotify()
   325  			}
   326  		}
   327  
   328  		servers := w.apiServerHostPorts()
   329  		apiHostPorts := make([]network.SpaceHostPorts, 0, len(servers))
   330  		for _, serverHostPorts := range servers {
   331  			apiHostPorts = append(apiHostPorts, serverHostPorts)
   332  		}
   333  
   334  		var failed bool
   335  		if err := w.config.APIHostPortsSetter.SetAPIHostPorts(apiHostPorts); err != nil {
   336  			logger.Errorf("cannot write API server addresses: %v", err)
   337  			failed = true
   338  		}
   339  
   340  		members, err := w.updateReplicaSet()
   341  		if err != nil {
   342  			if errors.Is(err, replicaSetError) {
   343  				logger.Errorf("cannot set replicaset: %v", err)
   344  			} else if !errors.Is(err, stepDownPrimaryError) {
   345  				return errors.Trace(err)
   346  			} else {
   347  				logger.Tracef("isStepDownPrimary error: %v", err)
   348  			}
   349  			// both replicaset errors and stepping down the primary are both considered fast-retry 'failures'.
   350  			// we need to re-read the state after a short timeout and re-evaluate the replicaset.
   351  			failed = true
   352  		}
   353  		w.publishAPIServerDetails(servers, members)
   354  
   355  		if failed {
   356  			logger.Tracef("failed, will wake up after: %v", retryInterval)
   357  			updateChan = w.config.Clock.After(retryInterval)
   358  			retryInterval = scaleRetry(retryInterval)
   359  		} else {
   360  			// Update the replica set members occasionally to keep them up to
   361  			// date with the current replica-set member statuses.
   362  			// If we had previously failed to update the replicaset,
   363  			// the updateChan isn't set to the pollInterval. So if we had just
   364  			// processed an update, or have just succeeded after a failure reset
   365  			// the updateChan to the pollInterval.
   366  			if updateChan == nil || retryInterval != initialRetryInterval {
   367  				logger.Tracef("succeeded, will wake up after: %v", pollInterval)
   368  				updateChan = w.config.Clock.After(pollInterval)
   369  			} else {
   370  				logger.Tracef("succeeded, wait already pending")
   371  			}
   372  			retryInterval = initialRetryInterval
   373  		}
   374  		if w.idleFunc != nil {
   375  			idle.Reset(IdleTime)
   376  		}
   377  	}
   378  }
   379  
   380  func scaleRetry(value time.Duration) time.Duration {
   381  	value *= 2
   382  	if value > maxRetryInterval {
   383  		value = maxRetryInterval
   384  	}
   385  	return value
   386  }
   387  
   388  // watchForControllerChanges starts two watchers for changes to controller
   389  // info and status.
   390  // It returns a channel which will receive events if any of the watchers fires.
   391  func (w *pgWorker) watchForControllerChanges() (<-chan struct{}, error) {
   392  	controllerInfoWatcher := w.config.State.WatchControllerInfo()
   393  	if err := w.catacomb.Add(controllerInfoWatcher); err != nil {
   394  		return nil, errors.Trace(err)
   395  	}
   396  
   397  	controllerStatusWatcher := w.config.State.WatchControllerStatusChanges()
   398  	if err := w.catacomb.Add(controllerStatusWatcher); err != nil {
   399  		return nil, errors.Trace(err)
   400  	}
   401  
   402  	out := make(chan struct{})
   403  	var notifyCh chan struct{}
   404  	go func() {
   405  		for {
   406  			select {
   407  			case <-w.catacomb.Dying():
   408  				return
   409  			case <-controllerInfoWatcher.Changes():
   410  				notifyCh = out
   411  			case <-controllerStatusWatcher.Changes():
   412  				notifyCh = out
   413  			case notifyCh <- struct{}{}:
   414  				notifyCh = nil
   415  			}
   416  		}
   417  	}()
   418  	return out, nil
   419  }
   420  
   421  // watchForConfigChanges starts a watcher for changes to controller config.
   422  // It returns a channel which will receive events if the watcher fires.
   423  // This is separate from watchForControllerChanges because of the worker loop
   424  // logic. If controller nodes have not changed, then further processing
   425  // does not occur, whereas we want to re-publish API addresses and check
   426  // for replica-set changes if either the management or HA space configs have
   427  // changed.
   428  func (w *pgWorker) watchForConfigChanges() (<-chan struct{}, error) {
   429  	controllerConfigWatcher := w.config.State.WatchControllerConfig()
   430  	if err := w.catacomb.Add(controllerConfigWatcher); err != nil {
   431  		return nil, errors.Trace(err)
   432  	}
   433  	return controllerConfigWatcher.Changes(), nil
   434  }
   435  
   436  // updateControllerNodes updates the peergrouper's current list of
   437  // controller nodes, as well as starting and stopping trackers for
   438  // them as they are added and removed.
   439  func (w *pgWorker) updateControllerNodes() (bool, error) {
   440  	controllerIds, err := w.config.State.ControllerIds()
   441  	if err != nil {
   442  		return false, fmt.Errorf("cannot get controller ids: %v", err)
   443  	}
   444  
   445  	logger.Debugf("controller nodes in state: %#v", controllerIds)
   446  	changed := false
   447  
   448  	// Stop controller goroutines that no longer correspond to controller nodes.
   449  	for _, m := range w.controllerTrackers {
   450  		if !inStrings(m.Id(), controllerIds) {
   451  			_ = worker.Stop(m)
   452  			delete(w.controllerTrackers, m.Id())
   453  			changed = true
   454  		}
   455  	}
   456  
   457  	// Start nodes with no watcher
   458  	for _, id := range controllerIds {
   459  		controllerNode, err := w.config.State.ControllerNode(id)
   460  		if err != nil {
   461  			if errors.IsNotFound(err) {
   462  				// If the controller isn't found, it must have been
   463  				// removed and will soon enough be removed
   464  				// from the controller list. This will probably
   465  				// never happen, but we'll code defensively anyway.
   466  				logger.Warningf("controller %q from controller list not found", id)
   467  				continue
   468  			}
   469  			return false, fmt.Errorf("cannot get controller %q: %v", id, err)
   470  		}
   471  		controllerHost, err := w.config.State.ControllerHost(id)
   472  		if err != nil {
   473  			if errors.IsNotFound(err) {
   474  				// If the controller isn't found, it must have been
   475  				// removed and will soon enough be removed
   476  				// from the controller list. This will probably
   477  				// never happen, but we'll code defensively anyway.
   478  				logger.Warningf("controller %q from controller list not found", id)
   479  				continue
   480  			}
   481  			return false, fmt.Errorf("cannot get controller %q: %v", id, err)
   482  		}
   483  		if _, ok := w.controllerTrackers[id]; ok {
   484  			continue
   485  		}
   486  
   487  		logger.Debugf("found new controller %q", id)
   488  		tracker, err := newControllerTracker(controllerNode, controllerHost, w.controllerChanges)
   489  		if err != nil {
   490  			return false, errors.Trace(err)
   491  		}
   492  		if err := w.catacomb.Add(tracker); err != nil {
   493  			return false, errors.Trace(err)
   494  		}
   495  		w.controllerTrackers[id] = tracker
   496  		changed = true
   497  	}
   498  
   499  	return changed, nil
   500  }
   501  
   502  func (w *pgWorker) apiserverDetailsRequested(topic string, request apiserver.DetailsRequest, err error) {
   503  	if err != nil {
   504  		// This shouldn't happen (barring programmer error ;) - treat it as fatal.
   505  		w.catacomb.Kill(errors.Annotate(err, "apiserver details request callback failed"))
   506  		return
   507  	}
   508  	select {
   509  	case w.detailsRequests <- request.Requester:
   510  	case <-w.catacomb.Dying():
   511  	}
   512  }
   513  
   514  func inStrings(t string, ss []string) bool {
   515  	for _, s := range ss {
   516  		if s == t {
   517  			return true
   518  		}
   519  	}
   520  	return false
   521  }
   522  
   523  // apiServerHostPorts returns the host-ports for each apiserver controller.
   524  func (w *pgWorker) apiServerHostPorts() map[string]network.SpaceHostPorts {
   525  	servers := make(map[string]network.SpaceHostPorts)
   526  	for _, m := range w.controllerTrackers {
   527  		hostPorts := network.SpaceAddressesWithPort(m.Addresses(), w.config.APIPort)
   528  		if len(hostPorts) == 0 {
   529  			continue
   530  		}
   531  		servers[m.Id()] = hostPorts
   532  	}
   533  	return servers
   534  }
   535  
   536  // publishAPIServerDetails publishes the details corresponding to the latest
   537  // known controller/replica-set topology if it has changed from the last known
   538  // state.
   539  func (w *pgWorker) publishAPIServerDetails(
   540  	servers map[string]network.SpaceHostPorts,
   541  	members map[string]*replicaset.Member,
   542  ) {
   543  	details := apiserver.Details{
   544  		Servers:   make(map[string]apiserver.APIServer),
   545  		LocalOnly: true,
   546  	}
   547  	internalPort := w.config.ControllerAPIPort
   548  	if internalPort == 0 {
   549  		internalPort = w.config.APIPort
   550  	}
   551  	for id, hostPorts := range servers {
   552  		var internalAddress string
   553  		if members[id] != nil {
   554  			mongoAddress, _, err := net.SplitHostPort(members[id].Address)
   555  			if err != nil {
   556  				logger.Errorf("splitting host/port for address %q: %v", members[id].Address, err)
   557  			} else {
   558  				internalAddress = net.JoinHostPort(mongoAddress, strconv.Itoa(internalPort))
   559  			}
   560  		} else {
   561  			logger.Tracef("replica-set member %q not found", id)
   562  		}
   563  
   564  		server := apiserver.APIServer{
   565  			ID:              id,
   566  			InternalAddress: internalAddress,
   567  		}
   568  		for _, hp := range hostPorts.HostPorts().FilterUnusable() {
   569  			server.Addresses = append(server.Addresses, network.DialAddress(hp))
   570  		}
   571  		sort.Strings(server.Addresses)
   572  		details.Servers[server.ID] = server
   573  	}
   574  
   575  	if !reflect.DeepEqual(w.serverDetails, details) {
   576  		_, _ = w.config.Hub.Publish(apiserver.DetailsTopic, details)
   577  		w.serverDetails = details
   578  	}
   579  }
   580  
   581  // replicaSetError means an error occurred as a result
   582  // of calling replicaset.Set. As this is expected to fail
   583  // in the normal course of things, it needs special treatment.
   584  const replicaSetError = errors.ConstError("replicaset error")
   585  
   586  // stepDownPrimaryError means we needed to ask the primary to step down, so we should come back and re-evaluate the
   587  // replicaset once the new primary is voted in
   588  const stepDownPrimaryError = errors.ConstError("primary is stepping down, must reevaluate peer group")
   589  
   590  // updateReplicaSet sets the current replica set members, and applies the
   591  // given voting status to nodes in the state. A mapping of controller ID
   592  // to replicaset.Member structures is returned.
   593  func (w *pgWorker) updateReplicaSet() (map[string]*replicaset.Member, error) {
   594  	info, err := w.peerGroupInfo()
   595  	if err != nil {
   596  		return nil, errors.Annotate(err, "creating peer group info")
   597  	}
   598  	// Update the metrics collector with the replicaset statuses.
   599  	w.metrics.update(info.statuses)
   600  	desired, err := desiredPeerGroup(info)
   601  	// membersChanged, members, voting, err
   602  	if err != nil {
   603  		return nil, errors.Annotate(err, "computing desired peer group")
   604  	}
   605  	if logger.IsDebugEnabled() {
   606  		if desired.isChanged {
   607  			logger.Debugf("desired peer group members: \n%s", prettyReplicaSetMembers(desired.members))
   608  		} else {
   609  			var output []string
   610  			for id, m := range desired.members {
   611  				output = append(output, fmt.Sprintf("  %s: %v", id, isVotingMember(m)))
   612  			}
   613  			logger.Debugf("no change in desired peer group, voting: \n%s", strings.Join(output, "\n"))
   614  		}
   615  	}
   616  
   617  	if desired.stepDownPrimary {
   618  		logger.Infof("mongo primary controller needs to be removed, first requesting it to step down")
   619  		if err := w.config.MongoSession.StepDownPrimary(); err != nil {
   620  			// StepDownPrimary should have already handled the io.EOF that mongo might give, so any error we
   621  			// get is unknown
   622  			return nil, errors.Annotate(err, "asking primary to step down")
   623  		}
   624  		// Asking the Primary to step down forces us to disconnect from Mongo, but session.Refresh() should get us
   625  		// reconnected so we can keep operating
   626  		w.config.MongoSession.Refresh()
   627  		// However, we no longer know who the primary is, so we have to error out and have it reevaluated
   628  		return nil, stepDownPrimaryError
   629  	}
   630  
   631  	// Figure out if we are running on the mongo primary.
   632  	controllerId := w.config.ControllerId()
   633  	isPrimary, err := info.isPrimary(controllerId)
   634  	if err != nil && !errors.IsNotFound(err) {
   635  		return nil, errors.Annotatef(err, "determining primary status of controller %q", controllerId)
   636  	}
   637  	logger.Debugf("controller node %q primary: %v", controllerId, isPrimary)
   638  	if !isPrimary {
   639  		return desired.members, nil
   640  	}
   641  
   642  	// Currently k8s controllers do not support HA, so only update
   643  	// the replicaset config if HA is enabled and there is a change.
   644  	// Only controllers corresponding with the mongo primary should
   645  	// update the replicaset, otherwise there will be a race since
   646  	// a diff needs to be calculated so the changes can be applied
   647  	// one at a time.
   648  	if w.config.SupportsHA && desired.isChanged {
   649  		ms := make([]replicaset.Member, 0, len(desired.members))
   650  		ids := make([]string, 0, len(desired.members))
   651  		for id := range desired.members {
   652  			ids = append(ids, id)
   653  		}
   654  		sortAsInts(ids)
   655  		for _, id := range ids {
   656  			m := desired.members[id]
   657  			ms = append(ms, *m)
   658  		}
   659  		if err := w.config.MongoSession.Set(ms); err != nil {
   660  			return nil, errors.WithType(err, replicaSetError)
   661  		}
   662  		logger.Infof("successfully updated replica set")
   663  	}
   664  
   665  	// Reset controller status for members of the changed peer-group.
   666  	// Any previous peer-group determination errors result in status
   667  	// warning messages.
   668  	for id := range desired.members {
   669  		if err := w.controllerTrackers[id].host.SetStatus(getStatusInfo("")); err != nil {
   670  			return nil, errors.Trace(err)
   671  		}
   672  	}
   673  	if err := w.updateVoteStatus(); err != nil {
   674  		return nil, errors.Trace(err)
   675  	}
   676  	for _, tracker := range w.controllerTrackers {
   677  		if tracker.host.Life() != state.Alive && !tracker.node.HasVote() {
   678  			logger.Debugf("removing dying controller %s references", tracker.Id())
   679  			if err := w.config.State.RemoveControllerReference(tracker.node); err != nil {
   680  				logger.Errorf("failed to remove dying controller as a controller after removing its vote: %v", err)
   681  			}
   682  		}
   683  	}
   684  	return desired.members, nil
   685  }
   686  
   687  func (w *pgWorker) updateVoteStatus() error {
   688  	currentMembers, err := w.config.MongoSession.CurrentMembers()
   689  	if err != nil {
   690  		return errors.Trace(err)
   691  	}
   692  	orphanedNodes := set.NewStrings()
   693  	for id := range w.controllerTrackers {
   694  		orphanedNodes.Add(id)
   695  	}
   696  	var voting, nonVoting []*controllerTracker
   697  	for _, m := range currentMembers {
   698  		node, ok := w.controllerTrackers[m.Tags[jujuNodeKey]]
   699  		orphanedNodes.Remove(node.Id())
   700  		if ok {
   701  			if !node.HasVote() && isVotingMember(&m) {
   702  				logger.Tracef("controller %v is now voting member", node.Id())
   703  				voting = append(voting, node)
   704  			} else if node.HasVote() && !isVotingMember(&m) {
   705  				logger.Tracef("controller %v is now non voting member", node.Id())
   706  				nonVoting = append(nonVoting, node)
   707  			}
   708  		}
   709  	}
   710  	logger.Debugf("controllers that are no longer in replicaset: %v", orphanedNodes.Values())
   711  	for _, id := range orphanedNodes.Values() {
   712  		node := w.controllerTrackers[id]
   713  		nonVoting = append(nonVoting, node)
   714  	}
   715  	if err := setHasVote(voting, true); err != nil {
   716  		return errors.Annotatef(err, "adding voters")
   717  	}
   718  	if err := setHasVote(nonVoting, false); err != nil {
   719  		return errors.Annotatef(err, "removing non-voters")
   720  	}
   721  	return nil
   722  }
   723  
   724  const (
   725  	voting    = "voting"
   726  	nonvoting = "non-voting"
   727  )
   728  
   729  func prettyReplicaSetMembers(members map[string]*replicaset.Member) string {
   730  	var result []string
   731  	// It's easier to read if we sort by Id.
   732  	keys := make([]string, 0, len(members))
   733  	for key := range members {
   734  		keys = append(keys, key)
   735  	}
   736  	sort.Strings(keys)
   737  	for _, key := range keys {
   738  		m := members[key]
   739  		voteStatus := nonvoting
   740  		if isVotingMember(m) {
   741  			voteStatus = voting
   742  		}
   743  		result = append(result, fmt.Sprintf("    Id: %d, Tags: %v, %s", m.Id, m.Tags, voteStatus))
   744  	}
   745  	return strings.Join(result, "\n")
   746  }
   747  
   748  // peerGroupInfo collates current session information about the
   749  // mongo peer group with information from state node instances.
   750  func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) {
   751  	sts, err := w.config.MongoSession.CurrentStatus()
   752  	if err != nil {
   753  		return nil, errors.Annotate(err, "cannot get replica set status")
   754  	}
   755  
   756  	members, err := w.config.MongoSession.CurrentMembers()
   757  	if err != nil {
   758  		return nil, errors.Annotate(err, "cannot get replica set members")
   759  	}
   760  
   761  	haSpace, err := w.getHASpaceFromConfig()
   762  	if err != nil {
   763  		return nil, err
   764  	}
   765  
   766  	if logger.IsTraceEnabled() {
   767  		logger.Tracef("read peer group info: %# v\n%# v", pretty.Formatter(sts), pretty.Formatter(members))
   768  	}
   769  	return newPeerGroupInfo(w.controllerTrackers, sts.Members, members, w.config.MongoPort, haSpace)
   770  }
   771  
   772  // getHASpaceFromConfig returns a space based on the controller's
   773  // configuration for the HA space.
   774  func (w *pgWorker) getHASpaceFromConfig() (network.SpaceInfo, error) {
   775  	config, err := w.config.State.ControllerConfig()
   776  	if err != nil {
   777  		return network.SpaceInfo{}, errors.Trace(err)
   778  	}
   779  
   780  	jujuHASpace := config.JujuHASpace()
   781  	if jujuHASpace == "" {
   782  		return network.SpaceInfo{}, nil
   783  	}
   784  	space, err := w.config.State.Space(jujuHASpace)
   785  	if err != nil {
   786  		return network.SpaceInfo{}, errors.Trace(err)
   787  	}
   788  	return space.NetworkSpace()
   789  }
   790  
   791  // setHasVote sets the HasVote status of all the given nodes to hasVote.
   792  func setHasVote(ms []*controllerTracker, hasVote bool) error {
   793  	if len(ms) == 0 {
   794  		return nil
   795  	}
   796  	logger.Infof("setting HasVote=%v on nodes %v", hasVote, ms)
   797  	for _, m := range ms {
   798  		if err := m.node.SetHasVote(hasVote); err != nil {
   799  			return fmt.Errorf("cannot set voting status of %q to %v: %v", m.Id(), hasVote, err)
   800  		}
   801  	}
   802  	return nil
   803  }