github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/worker/peergrouper/worker.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"fmt"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/juju/errors"
    12  	"github.com/juju/replicaset"
    13  	"launchpad.net/tomb"
    14  
    15  	"github.com/juju/juju/instance"
    16  	"github.com/juju/juju/mongo"
    17  	"github.com/juju/juju/network"
    18  	"github.com/juju/juju/state"
    19  	"github.com/juju/juju/worker"
    20  )
    21  
    22  type stateInterface interface {
    23  	Machine(id string) (stateMachine, error)
    24  	WatchStateServerInfo() state.NotifyWatcher
    25  	StateServerInfo() (*state.StateServerInfo, error)
    26  	MongoSession() mongoSession
    27  }
    28  
    29  type stateMachine interface {
    30  	Id() string
    31  	InstanceId() (instance.Id, error)
    32  	Refresh() error
    33  	Watch() state.NotifyWatcher
    34  	WantsVote() bool
    35  	HasVote() bool
    36  	SetHasVote(hasVote bool) error
    37  	APIHostPorts() []network.HostPort
    38  	MongoHostPorts() []network.HostPort
    39  }
    40  
    41  type mongoSession interface {
    42  	CurrentStatus() (*replicaset.Status, error)
    43  	CurrentMembers() ([]replicaset.Member, error)
    44  	Set([]replicaset.Member) error
    45  }
    46  
    47  type publisherInterface interface {
    48  	// publish publishes information about the given state servers
    49  	// to whomsoever it may concern. When it is called there
    50  	// is no guarantee that any of the information has actually changed.
    51  	publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error
    52  }
    53  
    54  // notifyFunc holds a function that is sent
    55  // to the main worker loop to fetch new information
    56  // when something changes. It reports whether
    57  // the information has actually changed (and by implication
    58  // whether the replica set may need to be changed).
    59  type notifyFunc func() (changed bool, err error)
    60  
    61  var (
    62  	// If we fail to set the mongo replica set members,
    63  	// we start retrying with the following interval,
    64  	// before exponentially backing off with each further
    65  	// attempt.
    66  	initialRetryInterval = 2 * time.Second
    67  
    68  	// maxRetryInterval holds the maximum interval
    69  	// between retry attempts.
    70  	maxRetryInterval = 5 * time.Minute
    71  
    72  	// pollInterval holds the interval at which the replica set
    73  	// members will be updated even in the absence of changes
    74  	// to State. This enables us to make changes to members
    75  	// that are triggered by changes to member status.
    76  	pollInterval = 1 * time.Minute
    77  )
    78  
    79  // pgWorker holds all the mutable state that we are watching.
    80  // The only goroutine that is allowed to modify this
    81  // is worker.loop - other watchers modify the
    82  // current state by calling worker.notify instead of
    83  // modifying it directly.
    84  type pgWorker struct {
    85  	tomb tomb.Tomb
    86  
    87  	// wg represents all the currently running goroutines.
    88  	// The worker main loop waits for all of these to exit
    89  	// before finishing.
    90  	wg sync.WaitGroup
    91  
    92  	// st represents the State. It is an interface so we can swap
    93  	// out the implementation during testing.
    94  	st stateInterface
    95  
    96  	// When something changes that might affect
    97  	// the peer group membership, it sends a function
    98  	// on notifyCh that is run inside the main worker
    99  	// goroutine to mutate the state. It reports whether
   100  	// the state has actually changed.
   101  	notifyCh chan notifyFunc
   102  
   103  	// machines holds the set of machines we are currently
   104  	// watching (all the state server machines). Each one has an
   105  	// associated goroutine that
   106  	// watches attributes of that machine.
   107  	machines map[string]*machine
   108  
   109  	// publisher holds the implementation of the API
   110  	// address publisher.
   111  	publisher publisherInterface
   112  }
   113  
   114  // New returns a new worker that maintains the mongo replica set
   115  // with respect to the given state.
   116  func New(st *state.State) (worker.Worker, error) {
   117  	cfg, err := st.EnvironConfig()
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  	return newWorker(&stateShim{
   122  		State:     st,
   123  		mongoPort: cfg.StatePort(),
   124  		apiPort:   cfg.APIPort(),
   125  	}, newPublisher(st, cfg.PreferIPv6())), nil
   126  }
   127  
   128  func newWorker(st stateInterface, pub publisherInterface) worker.Worker {
   129  	w := &pgWorker{
   130  		st:        st,
   131  		notifyCh:  make(chan notifyFunc),
   132  		machines:  make(map[string]*machine),
   133  		publisher: pub,
   134  	}
   135  	go func() {
   136  		defer w.tomb.Done()
   137  		if err := w.loop(); err != nil {
   138  			logger.Errorf("peergrouper loop terminated: %v", err)
   139  			w.tomb.Kill(err)
   140  		}
   141  		// Wait for the various goroutines to be killed.
   142  		// N.B. we don't defer this call because
   143  		// if we do and a bug causes a panic, Wait will deadlock
   144  		// waiting for the unkilled goroutines to exit.
   145  		w.wg.Wait()
   146  	}()
   147  	return w
   148  }
   149  
   150  func (w *pgWorker) Kill() {
   151  	w.tomb.Kill(nil)
   152  }
   153  
   154  func (w *pgWorker) Wait() error {
   155  	return w.tomb.Wait()
   156  }
   157  
   158  func (w *pgWorker) loop() error {
   159  	infow := w.watchStateServerInfo()
   160  	defer infow.stop()
   161  
   162  	retry := time.NewTimer(0)
   163  	retry.Stop()
   164  	retryInterval := initialRetryInterval
   165  	for {
   166  		select {
   167  		case f := <-w.notifyCh:
   168  			// Update our current view of the state of affairs.
   169  			changed, err := f()
   170  			if err != nil {
   171  				return err
   172  			}
   173  			if !changed {
   174  				break
   175  			}
   176  			// Try to update the replica set immediately.
   177  			retry.Reset(0)
   178  		case <-retry.C:
   179  			ok := true
   180  			servers, instanceIds, err := w.apiPublishInfo()
   181  			if err != nil {
   182  				return fmt.Errorf("cannot get API server info: %v", err)
   183  			}
   184  			if err := w.publisher.publishAPIServers(servers, instanceIds); err != nil {
   185  				logger.Errorf("cannot publish API server addresses: %v", err)
   186  				ok = false
   187  			}
   188  			if err := w.updateReplicaset(); err != nil {
   189  				if _, isReplicaSetError := err.(*replicaSetError); !isReplicaSetError {
   190  					return err
   191  				}
   192  				logger.Errorf("cannot set replicaset: %v", err)
   193  				ok = false
   194  			}
   195  			if ok {
   196  				// Update the replica set members occasionally
   197  				// to keep them up to date with the current
   198  				// replica set member statuses.
   199  				retry.Reset(pollInterval)
   200  				retryInterval = initialRetryInterval
   201  			} else {
   202  				retry.Reset(retryInterval)
   203  				retryInterval *= 2
   204  				if retryInterval > maxRetryInterval {
   205  					retryInterval = maxRetryInterval
   206  				}
   207  			}
   208  
   209  		case <-w.tomb.Dying():
   210  			return tomb.ErrDying
   211  		}
   212  	}
   213  }
   214  
   215  func (w *pgWorker) apiPublishInfo() ([][]network.HostPort, []instance.Id, error) {
   216  	servers := make([][]network.HostPort, 0, len(w.machines))
   217  	instanceIds := make([]instance.Id, 0, len(w.machines))
   218  	for _, m := range w.machines {
   219  		if len(m.apiHostPorts) == 0 {
   220  			continue
   221  		}
   222  		instanceId, err := m.stm.InstanceId()
   223  		if err != nil {
   224  			return nil, nil, err
   225  		}
   226  		instanceIds = append(instanceIds, instanceId)
   227  		servers = append(servers, m.apiHostPorts)
   228  
   229  	}
   230  	return servers, instanceIds, nil
   231  }
   232  
   233  // notify sends the given notification function to
   234  // the worker main loop to be executed.
   235  func (w *pgWorker) notify(f notifyFunc) bool {
   236  	select {
   237  	case w.notifyCh <- f:
   238  		return true
   239  	case <-w.tomb.Dying():
   240  		return false
   241  	}
   242  }
   243  
   244  // peerGroupInfo collates current session information about the
   245  // mongo peer group with information from state machines.
   246  func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) {
   247  	session := w.st.MongoSession()
   248  	info := &peerGroupInfo{}
   249  	var err error
   250  	status, err := session.CurrentStatus()
   251  	if err != nil {
   252  		return nil, fmt.Errorf("cannot get replica set status: %v", err)
   253  	}
   254  	info.statuses = status.Members
   255  	info.members, err = session.CurrentMembers()
   256  	if err != nil {
   257  		return nil, fmt.Errorf("cannot get replica set members: %v", err)
   258  	}
   259  	info.machines = w.machines
   260  	return info, nil
   261  }
   262  
   263  // replicaSetError holds an error returned as a result
   264  // of calling replicaset.Set. As this is expected to fail
   265  // in the normal course of things, it needs special treatment.
   266  type replicaSetError struct {
   267  	error
   268  }
   269  
   270  // updateReplicaset sets the current replica set members, and applies the
   271  // given voting status to machines in the state.
   272  func (w *pgWorker) updateReplicaset() error {
   273  	info, err := w.peerGroupInfo()
   274  	if err != nil {
   275  		return err
   276  	}
   277  	members, voting, err := desiredPeerGroup(info)
   278  	if err != nil {
   279  		return fmt.Errorf("cannot compute desired peer group: %v", err)
   280  	}
   281  	if members != nil {
   282  		logger.Debugf("desired peer group members: %#v", members)
   283  	} else {
   284  		logger.Debugf("no change in desired peer group (voting %#v)", voting)
   285  	}
   286  
   287  	// We cannot change the HasVote flag of a machine in state at exactly
   288  	// the same moment as changing its voting status in the replica set.
   289  	//
   290  	// Thus we need to be careful that a machine which is actually a voting
   291  	// member is not seen to not have a vote, because otherwise
   292  	// there is nothing to prevent the machine being removed.
   293  	//
   294  	// To avoid this happening, we make sure when we call SetReplicaSet,
   295  	// that the voting status of machines is the union of both old
   296  	// and new voting machines - that is the set of HasVote machines
   297  	// is a superset of all the actual voting machines.
   298  	//
   299  	// Only after the call has taken place do we reset the voting status
   300  	// of the machines that have lost their vote.
   301  	//
   302  	// If there's a crash, the voting status may not reflect the
   303  	// actual voting status for a while, but when things come
   304  	// back on line, it will be sorted out, as desiredReplicaSet
   305  	// will return the actual voting status.
   306  	//
   307  	// Note that we potentially update the HasVote status of the machines even
   308  	// if the members have not changed.
   309  	var added, removed []*machine
   310  	for m, hasVote := range voting {
   311  		switch {
   312  		case hasVote && !m.stm.HasVote():
   313  			added = append(added, m)
   314  		case !hasVote && m.stm.HasVote():
   315  			removed = append(removed, m)
   316  		}
   317  	}
   318  	if err := setHasVote(added, true); err != nil {
   319  		return err
   320  	}
   321  	if members != nil {
   322  		if err := w.st.MongoSession().Set(members); err != nil {
   323  			// We've failed to set the replica set, so revert back
   324  			// to the previous settings.
   325  			if err1 := setHasVote(added, false); err1 != nil {
   326  				logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1)
   327  			}
   328  			return &replicaSetError{err}
   329  		}
   330  		logger.Infof("successfully changed replica set to %#v", members)
   331  	}
   332  	if err := setHasVote(removed, false); err != nil {
   333  		return err
   334  	}
   335  	return nil
   336  }
   337  
   338  // start runs the given loop function until it returns.
   339  // When it returns, the receiving pgWorker is killed with
   340  // the returned error.
   341  func (w *pgWorker) start(loop func() error) {
   342  	w.wg.Add(1)
   343  	go func() {
   344  		defer w.wg.Done()
   345  		if err := loop(); err != nil {
   346  			w.tomb.Kill(err)
   347  		}
   348  	}()
   349  }
   350  
   351  // setHasVote sets the HasVote status of all the given
   352  // machines to hasVote.
   353  func setHasVote(ms []*machine, hasVote bool) error {
   354  	if len(ms) == 0 {
   355  		return nil
   356  	}
   357  	logger.Infof("setting HasVote=%v on machines %v", hasVote, ms)
   358  	for _, m := range ms {
   359  		if err := m.stm.SetHasVote(hasVote); err != nil {
   360  			return fmt.Errorf("cannot set voting status of %q to %v: %v", m.id, hasVote, err)
   361  		}
   362  	}
   363  	return nil
   364  }
   365  
   366  // serverInfoWatcher watches the state server info and
   367  // notifies the worker when it changes.
   368  type serverInfoWatcher struct {
   369  	worker  *pgWorker
   370  	watcher state.NotifyWatcher
   371  }
   372  
   373  func (w *pgWorker) watchStateServerInfo() *serverInfoWatcher {
   374  	infow := &serverInfoWatcher{
   375  		worker:  w,
   376  		watcher: w.st.WatchStateServerInfo(),
   377  	}
   378  	w.start(infow.loop)
   379  	return infow
   380  }
   381  
   382  func (infow *serverInfoWatcher) loop() error {
   383  	for {
   384  		select {
   385  		case _, ok := <-infow.watcher.Changes():
   386  			if !ok {
   387  				return infow.watcher.Err()
   388  			}
   389  			infow.worker.notify(infow.updateMachines)
   390  		case <-infow.worker.tomb.Dying():
   391  			return tomb.ErrDying
   392  		}
   393  	}
   394  }
   395  
   396  func (infow *serverInfoWatcher) stop() {
   397  	infow.watcher.Stop()
   398  }
   399  
   400  // updateMachines is a notifyFunc that updates the current
   401  // machines when the state server info has changed.
   402  func (infow *serverInfoWatcher) updateMachines() (bool, error) {
   403  	info, err := infow.worker.st.StateServerInfo()
   404  	if err != nil {
   405  		return false, fmt.Errorf("cannot get state server info: %v", err)
   406  	}
   407  	changed := false
   408  	// Stop machine goroutines that no longer correspond to state server
   409  	// machines.
   410  	for _, m := range infow.worker.machines {
   411  		if !inStrings(m.id, info.MachineIds) {
   412  			m.stop()
   413  			delete(infow.worker.machines, m.id)
   414  			changed = true
   415  		}
   416  	}
   417  	// Start machines with no watcher
   418  	for _, id := range info.MachineIds {
   419  		if _, ok := infow.worker.machines[id]; ok {
   420  			continue
   421  		}
   422  		logger.Debugf("found new machine %q", id)
   423  		stm, err := infow.worker.st.Machine(id)
   424  		if err != nil {
   425  			if errors.IsNotFound(err) {
   426  				// If the machine isn't found, it must have been
   427  				// removed and will soon enough be removed
   428  				// from the state server list. This will probably
   429  				// never happen, but we'll code defensively anyway.
   430  				logger.Warningf("machine %q from state server list not found", id)
   431  				continue
   432  			}
   433  			return false, fmt.Errorf("cannot get machine %q: %v", id, err)
   434  		}
   435  		infow.worker.machines[id] = infow.worker.newMachine(stm)
   436  		changed = true
   437  	}
   438  	return changed, nil
   439  }
   440  
   441  // machine represents a machine in State.
   442  type machine struct {
   443  	id             string
   444  	wantsVote      bool
   445  	apiHostPorts   []network.HostPort
   446  	mongoHostPorts []network.HostPort
   447  
   448  	worker         *pgWorker
   449  	stm            stateMachine
   450  	machineWatcher state.NotifyWatcher
   451  }
   452  
   453  func (m *machine) mongoHostPort() string {
   454  	return mongo.SelectPeerHostPort(m.mongoHostPorts)
   455  }
   456  
   457  func (m *machine) String() string {
   458  	return m.id
   459  }
   460  
   461  func (m *machine) GoString() string {
   462  	return fmt.Sprintf("&peergrouper.machine{id: %q, wantsVote: %v, hostPort: %q}", m.id, m.wantsVote, m.mongoHostPort())
   463  }
   464  
   465  func (w *pgWorker) newMachine(stm stateMachine) *machine {
   466  	m := &machine{
   467  		worker:         w,
   468  		id:             stm.Id(),
   469  		stm:            stm,
   470  		apiHostPorts:   stm.APIHostPorts(),
   471  		mongoHostPorts: stm.MongoHostPorts(),
   472  		wantsVote:      stm.WantsVote(),
   473  		machineWatcher: stm.Watch(),
   474  	}
   475  	w.start(m.loop)
   476  	return m
   477  }
   478  
   479  func (m *machine) loop() error {
   480  	for {
   481  		select {
   482  		case _, ok := <-m.machineWatcher.Changes():
   483  			if !ok {
   484  				return m.machineWatcher.Err()
   485  			}
   486  			m.worker.notify(m.refresh)
   487  		case <-m.worker.tomb.Dying():
   488  			return nil
   489  		}
   490  	}
   491  }
   492  
   493  func (m *machine) stop() {
   494  	m.machineWatcher.Stop()
   495  }
   496  
   497  func (m *machine) refresh() (bool, error) {
   498  	if err := m.stm.Refresh(); err != nil {
   499  		if errors.IsNotFound(err) {
   500  			// We want to be robust when the machine
   501  			// state is out of date with respect to the
   502  			// state server info, so if the machine
   503  			// has been removed, just assume that
   504  			// no change has happened - the machine
   505  			// loop will be stopped very soon anyway.
   506  			return false, nil
   507  		}
   508  		return false, err
   509  	}
   510  	changed := false
   511  	if wantsVote := m.stm.WantsVote(); wantsVote != m.wantsVote {
   512  		m.wantsVote = wantsVote
   513  		changed = true
   514  	}
   515  	if hps := m.stm.MongoHostPorts(); !hostPortsEqual(hps, m.mongoHostPorts) {
   516  		m.mongoHostPorts = hps
   517  		changed = true
   518  	}
   519  	if hps := m.stm.APIHostPorts(); !hostPortsEqual(hps, m.apiHostPorts) {
   520  		m.apiHostPorts = hps
   521  		changed = true
   522  	}
   523  	return changed, nil
   524  }
   525  
   526  func hostPortsEqual(hps1, hps2 []network.HostPort) bool {
   527  	if len(hps1) != len(hps2) {
   528  		return false
   529  	}
   530  	for i := range hps1 {
   531  		if hps1[i] != hps2[i] {
   532  			return false
   533  		}
   534  	}
   535  	return true
   536  }
   537  
   538  func inStrings(t string, ss []string) bool {
   539  	for _, s := range ss {
   540  		if s == t {
   541  			return true
   542  		}
   543  	}
   544  	return false
   545  }