github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/peergrouper/worker.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"fmt"
     8  	"net"
     9  	"reflect"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/juju/clock"
    16  	"github.com/juju/errors"
    17  	"github.com/juju/loggo"
    18  	"github.com/juju/replicaset"
    19  	"github.com/kr/pretty"
    20  	"gopkg.in/juju/worker.v1"
    21  	"gopkg.in/juju/worker.v1/catacomb"
    22  
    23  	"github.com/juju/juju/controller"
    24  	"github.com/juju/juju/core/status"
    25  	"github.com/juju/juju/network"
    26  	"github.com/juju/juju/pubsub/apiserver"
    27  	"github.com/juju/juju/state"
    28  )
    29  
    30  var logger = loggo.GetLogger("juju.worker.peergrouper")
    31  
    32  type State interface {
    33  	RemoveControllerMachine(m Machine) error
    34  	ControllerConfig() (controller.Config, error)
    35  	ControllerInfo() (*state.ControllerInfo, error)
    36  	Machine(id string) (Machine, error)
    37  	WatchControllerInfo() state.NotifyWatcher
    38  	WatchControllerStatusChanges() state.StringsWatcher
    39  	WatchControllerConfig() state.NotifyWatcher
    40  }
    41  
    42  type Space interface {
    43  	Name() string
    44  }
    45  
    46  type Machine interface {
    47  	Id() string
    48  	Life() state.Life
    49  	Status() (status.StatusInfo, error)
    50  	SetStatus(status.StatusInfo) error
    51  	Refresh() error
    52  	Watch() state.NotifyWatcher
    53  	WantsVote() bool
    54  	HasVote() bool
    55  	SetHasVote(hasVote bool) error
    56  	Addresses() []network.Address
    57  }
    58  
    59  type MongoSession interface {
    60  	CurrentStatus() (*replicaset.Status, error)
    61  	CurrentMembers() ([]replicaset.Member, error)
    62  	Set([]replicaset.Member) error
    63  	StepDownPrimary() error
    64  	Refresh()
    65  }
    66  
    67  type APIHostPortsSetter interface {
    68  	SetAPIHostPorts([][]network.HostPort) error
    69  }
    70  
    71  var (
    72  	// If we fail to set the mongo replica set members,
    73  	// we start retrying with the following interval,
    74  	// before exponentially backing off with each further
    75  	// attempt.
    76  	initialRetryInterval = 2 * time.Second
    77  
    78  	// maxRetryInterval holds the maximum interval
    79  	// between retry attempts.
    80  	maxRetryInterval = 5 * time.Minute
    81  
    82  	// pollInterval holds the interval at which the replica set
    83  	// members will be updated even in the absence of changes
    84  	// to State. This enables us to make changes to members
    85  	// that are triggered by changes to member status.
    86  	pollInterval = 1 * time.Minute
    87  )
    88  
    89  // Hub defines the methods of the apiserver centralhub that the peer
    90  // grouper uses.
    91  type Hub interface {
    92  	Subscribe(topic string, handler interface{}) (func(), error)
    93  	Publish(topic string, data interface{}) (<-chan struct{}, error)
    94  }
    95  
    96  // pgWorker is a worker which watches the controller machines in state
    97  // as well as the MongoDB replicaset configuration, adding and
    98  // removing controller machines as they change or are added and
    99  // removed.
   100  type pgWorker struct {
   101  	catacomb catacomb.Catacomb
   102  
   103  	config Config
   104  
   105  	// machineChanges receives events from the machineTrackers when
   106  	// controller machines change in ways that are relevant to the
   107  	// peergrouper.
   108  	machineChanges chan struct{}
   109  
   110  	// machineTrackers holds the workers which track the machines we
   111  	// are currently watching (all the controller machines).
   112  	machineTrackers map[string]*machineTracker
   113  
   114  	// detailsRequests is used to feed details requests from the hub into the main loop.
   115  	detailsRequests chan string
   116  
   117  	// serverDetails holds the last server information broadcast via pub/sub.
   118  	// It is used to detect changes since the last publish.
   119  	serverDetails apiserver.Details
   120  }
   121  
   122  // Config holds the configuration for a peergrouper worker.
   123  type Config struct {
   124  	State              State
   125  	APIHostPortsSetter APIHostPortsSetter
   126  	MongoSession       MongoSession
   127  	Clock              clock.Clock
   128  	SupportsSpaces     bool
   129  	MongoPort          int
   130  	APIPort            int
   131  	ControllerAPIPort  int
   132  
   133  	// Hub is the central hub of the apiserver,
   134  	// and is used to publish the details of the
   135  	// API servers.
   136  	Hub Hub
   137  }
   138  
   139  // Validate validates the worker configuration.
   140  func (config Config) Validate() error {
   141  	if config.State == nil {
   142  		return errors.NotValidf("nil State")
   143  	}
   144  	if config.APIHostPortsSetter == nil {
   145  		return errors.NotValidf("nil APIHostPortsSetter")
   146  	}
   147  	if config.MongoSession == nil {
   148  		return errors.NotValidf("nil MongoSession")
   149  	}
   150  	if config.Clock == nil {
   151  		return errors.NotValidf("nil Clock")
   152  	}
   153  	if config.Hub == nil {
   154  		return errors.NotValidf("nil Hub")
   155  	}
   156  	if config.MongoPort <= 0 {
   157  		return errors.NotValidf("non-positive MongoPort")
   158  	}
   159  	if config.APIPort <= 0 {
   160  		return errors.NotValidf("non-positive APIPort")
   161  	}
   162  	// TODO Juju 3.0: make ControllerAPIPort required.
   163  	return nil
   164  }
   165  
   166  // New returns a new worker that maintains the mongo replica set
   167  // with respect to the given state.
   168  func New(config Config) (worker.Worker, error) {
   169  	if err := config.Validate(); err != nil {
   170  		return nil, errors.Trace(err)
   171  	}
   172  
   173  	w := &pgWorker{
   174  		config:          config,
   175  		machineChanges:  make(chan struct{}),
   176  		machineTrackers: make(map[string]*machineTracker),
   177  		detailsRequests: make(chan string),
   178  	}
   179  	err := catacomb.Invoke(catacomb.Plan{
   180  		Site: &w.catacomb,
   181  		Work: w.loop,
   182  	})
   183  	if err != nil {
   184  		return nil, errors.Trace(err)
   185  	}
   186  	return w, nil
   187  }
   188  
   189  // Kill is part of the worker.Worker interface.
   190  func (w *pgWorker) Kill() {
   191  	w.catacomb.Kill(nil)
   192  }
   193  
   194  // Wait is part of the worker.Worker interface.
   195  func (w *pgWorker) Wait() error {
   196  	return w.catacomb.Wait()
   197  }
   198  
   199  func (w *pgWorker) loop() error {
   200  	controllerChanges, err := w.watchForControllerChanges()
   201  	if err != nil {
   202  		return errors.Trace(err)
   203  	}
   204  
   205  	configChanges, err := w.watchForConfigChanges()
   206  	if err != nil {
   207  		return errors.Trace(err)
   208  	}
   209  
   210  	unsubscribe, err := w.config.Hub.Subscribe(apiserver.DetailsRequestTopic, w.apiserverDetailsRequested)
   211  	if err != nil {
   212  		return errors.Trace(err)
   213  	}
   214  	defer unsubscribe()
   215  
   216  	var updateChan <-chan time.Time
   217  	retryInterval := initialRetryInterval
   218  
   219  	for {
   220  		logger.Tracef("waiting...")
   221  		select {
   222  		case <-w.catacomb.Dying():
   223  			return w.catacomb.ErrDying()
   224  		case <-controllerChanges:
   225  			// A controller machine was added or removed.
   226  			logger.Tracef("<-controllerChanges")
   227  			changed, err := w.updateControllerMachines()
   228  			if err != nil {
   229  				return errors.Trace(err)
   230  			}
   231  			if !changed {
   232  				continue
   233  			}
   234  			logger.Tracef("controller added or removed, update replica now")
   235  		case <-w.machineChanges:
   236  			// One of the controller machines changed.
   237  			logger.Tracef("<-w.machineChanges")
   238  		case <-configChanges:
   239  			// Controller config has changed.
   240  			logger.Tracef("<-w.configChanges")
   241  
   242  			// If a config change wakes up the loop before the topology has
   243  			// been represented in the worker's machine trackers, ignore it;
   244  			// errors will occur when trying to determine peer group changes.
   245  			// Continuing is OK because subsequent invocations of the loop will
   246  			// pick up the most recent config from state anyway.
   247  			if len(w.machineTrackers) == 0 {
   248  				logger.Tracef("no controller information, ignoring config change")
   249  				continue
   250  			}
   251  		case requester := <-w.detailsRequests:
   252  			// A client requested the details be resent (probably
   253  			// because they just subscribed).
   254  			logger.Tracef("<-w.detailsRequests (from %q)", requester)
   255  			w.config.Hub.Publish(apiserver.DetailsTopic, w.serverDetails)
   256  			continue
   257  		case <-updateChan:
   258  			// Scheduled update.
   259  			logger.Tracef("<-updateChan")
   260  			updateChan = nil
   261  		}
   262  
   263  		servers := w.apiServerHostPorts()
   264  		apiHostPorts := make([][]network.HostPort, 0, len(servers))
   265  		for _, serverHostPorts := range servers {
   266  			apiHostPorts = append(apiHostPorts, serverHostPorts)
   267  		}
   268  
   269  		var failed bool
   270  		if err := w.config.APIHostPortsSetter.SetAPIHostPorts(apiHostPorts); err != nil {
   271  			logger.Errorf("cannot write API server addresses: %v", err)
   272  			failed = true
   273  		}
   274  
   275  		members, err := w.updateReplicaSet()
   276  		if err != nil {
   277  			if _, isReplicaSetError := err.(*replicaSetError); isReplicaSetError {
   278  				logger.Errorf("cannot set replicaset: %v", err)
   279  			} else if _, isStepDownPrimary := err.(*stepDownPrimaryError); !isStepDownPrimary {
   280  				return errors.Trace(err)
   281  			}
   282  			// both replicaset errors and stepping down the primary are both considered fast-retry 'failures'.
   283  			// we need to re-read the state after a short timeout and re-evaluate the replicaset.
   284  			failed = true
   285  		}
   286  		w.publishAPIServerDetails(servers, members)
   287  
   288  		if failed {
   289  			logger.Tracef("failed, waking up after: %v", retryInterval)
   290  			updateChan = w.config.Clock.After(retryInterval)
   291  			retryInterval = scaleRetry(retryInterval)
   292  		} else {
   293  			// Update the replica set members occasionally to keep them up to
   294  			// date with the current replica-set member statuses.
   295  			logger.Tracef("succeeded, waking up after: %v", pollInterval)
   296  			if updateChan == nil {
   297  				updateChan = w.config.Clock.After(pollInterval)
   298  			}
   299  			retryInterval = initialRetryInterval
   300  		}
   301  	}
   302  }
   303  
   304  func scaleRetry(value time.Duration) time.Duration {
   305  	value *= 2
   306  	if value > maxRetryInterval {
   307  		value = maxRetryInterval
   308  	}
   309  	return value
   310  }
   311  
   312  // watchForControllerChanges starts two watchers for changes to controller
   313  // info and status.
   314  // It returns a channel which will receive events if any of the watchers fires.
   315  func (w *pgWorker) watchForControllerChanges() (<-chan struct{}, error) {
   316  	controllerInfoWatcher := w.config.State.WatchControllerInfo()
   317  	if err := w.catacomb.Add(controllerInfoWatcher); err != nil {
   318  		return nil, errors.Trace(err)
   319  	}
   320  
   321  	controllerStatusWatcher := w.config.State.WatchControllerStatusChanges()
   322  	if err := w.catacomb.Add(controllerStatusWatcher); err != nil {
   323  		return nil, errors.Trace(err)
   324  	}
   325  
   326  	out := make(chan struct{})
   327  	go func() {
   328  		for {
   329  			select {
   330  			case <-w.catacomb.Dying():
   331  				return
   332  			case <-controllerInfoWatcher.Changes():
   333  				out <- struct{}{}
   334  			case <-controllerStatusWatcher.Changes():
   335  				out <- struct{}{}
   336  			}
   337  		}
   338  	}()
   339  	return out, nil
   340  }
   341  
   342  // watchForConfigChanges starts a watcher for changes to controller config.
   343  // It returns a channel which will receive events if the watcher fires.
   344  // This is separate from watchForControllerChanges because of the worker loop
   345  // logic. If controller machines have not changed, then further processing
   346  // does not occur, whereas we want to re-publish API addresses and check
   347  // for replica-set changes if either the management or HA space configs have
   348  // changed.
   349  func (w *pgWorker) watchForConfigChanges() (<-chan struct{}, error) {
   350  	controllerConfigWatcher := w.config.State.WatchControllerConfig()
   351  	if err := w.catacomb.Add(controllerConfigWatcher); err != nil {
   352  		return nil, errors.Trace(err)
   353  	}
   354  	return controllerConfigWatcher.Changes(), nil
   355  }
   356  
   357  // updateControllerMachines updates the peergrouper's current list of
   358  // controller machines, as well as starting and stopping trackers for
   359  // them as they are added and removed.
   360  func (w *pgWorker) updateControllerMachines() (bool, error) {
   361  	info, err := w.config.State.ControllerInfo()
   362  	if err != nil {
   363  		return false, fmt.Errorf("cannot get controller info: %v", err)
   364  	}
   365  
   366  	logger.Debugf("controller machines in state: %#v", info.MachineIds)
   367  	changed := false
   368  
   369  	// Stop machine goroutines that no longer correspond to controller
   370  	// machines.
   371  	for _, m := range w.machineTrackers {
   372  		if !inStrings(m.Id(), info.MachineIds) {
   373  			worker.Stop(m)
   374  			delete(w.machineTrackers, m.Id())
   375  			changed = true
   376  		}
   377  	}
   378  
   379  	// Start machines with no watcher
   380  	for _, id := range info.MachineIds {
   381  		stm, err := w.config.State.Machine(id)
   382  		if err != nil {
   383  			if errors.IsNotFound(err) {
   384  				// If the machine isn't found, it must have been
   385  				// removed and will soon enough be removed
   386  				// from the controller list. This will probably
   387  				// never happen, but we'll code defensively anyway.
   388  				logger.Warningf("machine %q from controller list not found", id)
   389  				continue
   390  			}
   391  			return false, fmt.Errorf("cannot get machine %q: %v", id, err)
   392  		}
   393  		if _, ok := w.machineTrackers[id]; ok {
   394  			continue
   395  		}
   396  		logger.Debugf("found new machine %q", id)
   397  
   398  		// Don't add the machine unless it is "Started"
   399  		machineStatus, err := stm.Status()
   400  		if err != nil {
   401  			return false, errors.Annotatef(err, "cannot get status for machine %q", id)
   402  		}
   403  		// A machine in status Error or Stopped might still be properly running the controller. We still want to treat
   404  		// it as an active machine, even if we're trying to tear it down.
   405  		if machineStatus.Status != status.Pending {
   406  			logger.Debugf("machine %q has started, adding it to peergrouper list", id)
   407  			tracker, err := newMachineTracker(stm, w.machineChanges)
   408  			if err != nil {
   409  				return false, errors.Trace(err)
   410  			}
   411  			if err := w.catacomb.Add(tracker); err != nil {
   412  				return false, errors.Trace(err)
   413  			}
   414  			w.machineTrackers[id] = tracker
   415  			changed = true
   416  		} else {
   417  			logger.Debugf("machine %q not ready: %v", id, machineStatus.Status)
   418  		}
   419  
   420  	}
   421  	return changed, nil
   422  }
   423  
   424  func (w *pgWorker) apiserverDetailsRequested(topic string, request apiserver.DetailsRequest, err error) {
   425  	if err != nil {
   426  		// This shouldn't happen (barring programmer error ;) - treat it as fatal.
   427  		w.catacomb.Kill(errors.Annotate(err, "apiserver details request callback failed"))
   428  		return
   429  	}
   430  	select {
   431  	case w.detailsRequests <- request.Requester:
   432  	case <-w.catacomb.Dying():
   433  	}
   434  }
   435  
   436  func inStrings(t string, ss []string) bool {
   437  	for _, s := range ss {
   438  		if s == t {
   439  			return true
   440  		}
   441  	}
   442  	return false
   443  }
   444  
   445  // apiServerHostPorts returns the host-ports for each apiserver machine.
   446  func (w *pgWorker) apiServerHostPorts() map[string][]network.HostPort {
   447  	servers := make(map[string][]network.HostPort)
   448  	for _, m := range w.machineTrackers {
   449  		hostPorts := network.AddressesWithPort(m.Addresses(), w.config.APIPort)
   450  		if len(hostPorts) == 0 {
   451  			continue
   452  		}
   453  		servers[m.Id()] = hostPorts
   454  	}
   455  	return servers
   456  }
   457  
   458  // publishAPIServerDetails publishes the details corresponding to the latest
   459  // known controller/replica-set topology if it has changed from the last known
   460  // state.
   461  func (w *pgWorker) publishAPIServerDetails(
   462  	servers map[string][]network.HostPort,
   463  	members map[string]*replicaset.Member,
   464  ) {
   465  	details := apiserver.Details{
   466  		Servers:   make(map[string]apiserver.APIServer),
   467  		LocalOnly: true,
   468  	}
   469  	internalPort := w.config.ControllerAPIPort
   470  	if internalPort == 0 {
   471  		internalPort = w.config.APIPort
   472  	}
   473  	for id, hostPorts := range servers {
   474  		var internalAddress string
   475  		if members[id] != nil {
   476  			mongoAddress, _, err := net.SplitHostPort(members[id].Address)
   477  			if err == nil {
   478  				internalAddress = net.JoinHostPort(mongoAddress, strconv.Itoa(internalPort))
   479  			}
   480  		}
   481  		server := apiserver.APIServer{
   482  			ID:              id,
   483  			InternalAddress: internalAddress,
   484  		}
   485  		for _, hp := range network.FilterUnusableHostPorts(hostPorts) {
   486  			server.Addresses = append(server.Addresses, hp.String())
   487  		}
   488  		sort.Strings(server.Addresses)
   489  		details.Servers[server.ID] = server
   490  	}
   491  
   492  	if !reflect.DeepEqual(w.serverDetails, details) {
   493  		w.config.Hub.Publish(apiserver.DetailsTopic, details)
   494  		w.serverDetails = details
   495  	}
   496  }
   497  
   498  // replicaSetError holds an error returned as a result
   499  // of calling replicaset.Set. As this is expected to fail
   500  // in the normal course of things, it needs special treatment.
   501  type replicaSetError struct {
   502  	error
   503  }
   504  
   505  // stepDownPrimaryError means we needed to ask the primary to step down, so we should come back and re-evaluate the
   506  // replicaset once the new primary is voted in
   507  type stepDownPrimaryError struct {
   508  	error
   509  }
   510  
   511  // updateReplicaSet sets the current replica set members, and applies the
   512  // given voting status to machines in the state. A mapping of machine ID
   513  // to replicaset.Member structures is returned.
   514  func (w *pgWorker) updateReplicaSet() (map[string]*replicaset.Member, error) {
   515  	info, err := w.peerGroupInfo()
   516  	if err != nil {
   517  		return nil, errors.Annotate(err, "creating peer group info")
   518  	}
   519  	desired, err := desiredPeerGroup(info)
   520  	// membersChanged, members, voting, err
   521  	if err != nil {
   522  		return nil, errors.Annotate(err, "computing desired peer group")
   523  	}
   524  	if logger.IsDebugEnabled() {
   525  		if desired.isChanged {
   526  			logger.Debugf("desired peer group members: \n%s", prettyReplicaSetMembers(desired.members))
   527  		} else {
   528  			var output []string
   529  			for id, v := range desired.machineVoting {
   530  				output = append(output, fmt.Sprintf("  %s: %v", id, v))
   531  			}
   532  			logger.Debugf("no change in desired peer group, voting: \n%s", strings.Join(output, "\n"))
   533  		}
   534  	}
   535  
   536  	if desired.stepDownPrimary {
   537  		logger.Infof("mongo primary machine needs to be removed, first requesting it to step down")
   538  		if err := w.config.MongoSession.StepDownPrimary(); err != nil {
   539  			// StepDownPrimary should have already handled the io.EOF that mongo might give, so any error we
   540  			// get is unknown
   541  			return nil, errors.Annotate(err, "asking primary to step down")
   542  		}
   543  		// Asking the Primary to step down forces us to disconnect from Mongo, but session.Refresh() should get us
   544  		// reconnected so we can keep operating
   545  		w.config.MongoSession.Refresh()
   546  		// However, we no longer know who the primary is, so we have to error out and have it reevaluated
   547  		return nil, &stepDownPrimaryError{
   548  			error: errors.Errorf("primary is stepping down, must reevaluate peer group"),
   549  		}
   550  	}
   551  
   552  	// We cannot change the HasVote flag of a machine in state at exactly
   553  	// the same moment as changing its voting status in the replica set.
   554  	//
   555  	// Thus we need to be careful that a machine which is actually a voting
   556  	// member is not seen to not have a vote, because otherwise
   557  	// there is nothing to prevent the machine being removed.
   558  	//
   559  	// To avoid this happening, we make sure when we call SetReplicaSet,
   560  	// that the voting status of machines is the union of both old
   561  	// and new voting machines - that is the set of HasVote machines
   562  	// is a superset of all the actual voting machines.
   563  	//
   564  	// Only after the call has taken place do we reset the voting status
   565  	// of the machines that have lost their vote.
   566  	//
   567  	// If there's a crash, the voting status may not reflect the
   568  	// actual voting status for a while, but when things come
   569  	// back on line, it will be sorted out, as desiredReplicaSet
   570  	// will return the actual voting status.
   571  	//
   572  	// Note that we potentially update the HasVote status of the machines even
   573  	// if the members have not changed.
   574  	var added, removed []*machineTracker
   575  	// Iterate in obvious order so we don't get weird log messages
   576  	votingIds := make([]string, 0, len(desired.machineVoting))
   577  	for id := range desired.machineVoting {
   578  		votingIds = append(votingIds, id)
   579  	}
   580  	sortAsInts(votingIds)
   581  	for _, id := range votingIds {
   582  		hasVote := desired.machineVoting[id]
   583  		m := info.machines[id]
   584  		switch {
   585  		case hasVote && !m.stm.HasVote():
   586  			added = append(added, m)
   587  		case !hasVote && m.stm.HasVote():
   588  			removed = append(removed, m)
   589  		}
   590  	}
   591  	if err := setHasVote(added, true); err != nil {
   592  		return nil, errors.Annotate(err, "adding new voters")
   593  	}
   594  	if desired.isChanged {
   595  		ms := make([]replicaset.Member, 0, len(desired.members))
   596  		for _, m := range desired.members {
   597  			ms = append(ms, *m)
   598  		}
   599  		if err := w.config.MongoSession.Set(ms); err != nil {
   600  			// We've failed to set the replica set, so revert back
   601  			// to the previous settings.
   602  			if err1 := setHasVote(added, false); err1 != nil {
   603  				logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1)
   604  			}
   605  			return nil, &replicaSetError{err}
   606  		}
   607  		logger.Infof("successfully updated replica set")
   608  	}
   609  	if err := setHasVote(removed, false); err != nil {
   610  		return nil, errors.Annotate(err, "removing non-voters")
   611  	}
   612  
   613  	// Reset machine status for members of the changed peer-group.
   614  	// Any previous peer-group determination errors result in status
   615  	// warning messages.
   616  	for id := range desired.members {
   617  		if err := w.machineTrackers[id].stm.SetStatus(getStatusInfo("")); err != nil {
   618  			return nil, errors.Trace(err)
   619  		}
   620  	}
   621  	for _, tracker := range info.machines {
   622  		if tracker.stm.Life() != state.Alive && !tracker.stm.HasVote() {
   623  			logger.Debugf("removing dying controller machine %s", tracker.Id())
   624  			if err := w.config.State.RemoveControllerMachine(tracker.stm); err != nil {
   625  				logger.Errorf("failed to remove dying machine as a controller after removing its vote: %v", err)
   626  			}
   627  		}
   628  	}
   629  	for _, removedTracker := range removed {
   630  		if removedTracker.stm.Life() == state.Alive {
   631  			logger.Debugf("vote removed from %v but machine is %s", removedTracker.Id(), state.Alive)
   632  		}
   633  	}
   634  	return desired.members, nil
   635  }
   636  
   637  func prettyReplicaSetMembers(members map[string]*replicaset.Member) string {
   638  	var result []string
   639  	// Its easier to read if we sort by Id.
   640  	keys := make([]string, 0, len(members))
   641  	for key := range members {
   642  		keys = append(keys, key)
   643  	}
   644  	sort.Strings(keys)
   645  	for _, key := range keys {
   646  		m := members[key]
   647  		voting := "not-voting"
   648  		if isVotingMember(m) {
   649  			voting = "voting"
   650  		}
   651  		result = append(result, fmt.Sprintf("    Id: %d, Tags: %v, %s", m.Id, m.Tags, voting))
   652  	}
   653  	return strings.Join(result, "\n")
   654  }
   655  
   656  // peerGroupInfo collates current session information about the
   657  // mongo peer group with information from state machines.
   658  func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) {
   659  	sts, err := w.config.MongoSession.CurrentStatus()
   660  	if err != nil {
   661  		return nil, errors.Annotate(err, "cannot get replica set status")
   662  	}
   663  
   664  	members, err := w.config.MongoSession.CurrentMembers()
   665  	if err != nil {
   666  		return nil, errors.Annotate(err, "cannot get replica set members")
   667  	}
   668  
   669  	haSpace, err := w.getHASpaceFromConfig()
   670  	if err != nil {
   671  		return nil, err
   672  	}
   673  
   674  	logger.Tracef("read peer group info: %# v\n%# v", pretty.Formatter(sts), pretty.Formatter(members))
   675  	return newPeerGroupInfo(w.machineTrackers, sts.Members, members, w.config.MongoPort, haSpace)
   676  }
   677  
   678  // getHASpaceFromConfig returns a SpaceName from the controller config for
   679  // HA space. If unset, the empty space ("") will be returned.
   680  func (w *pgWorker) getHASpaceFromConfig() (network.SpaceName, error) {
   681  	config, err := w.config.State.ControllerConfig()
   682  	if err != nil {
   683  		return network.SpaceName(""), err
   684  	}
   685  	return network.SpaceName(config.JujuHASpace()), nil
   686  }
   687  
   688  // setHasVote sets the HasVote status of all the given machines to hasVote.
   689  func setHasVote(ms []*machineTracker, hasVote bool) error {
   690  	if len(ms) == 0 {
   691  		return nil
   692  	}
   693  	logger.Infof("setting HasVote=%v on machines %v", hasVote, ms)
   694  	for _, m := range ms {
   695  		if err := m.stm.SetHasVote(hasVote); err != nil {
   696  			return fmt.Errorf("cannot set voting status of %q to %v: %v", m.Id(), hasVote, err)
   697  		}
   698  	}
   699  	return nil
   700  }