github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/worker/peergrouper/worker.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package peergrouper
     5  
     6  import (
     7  	"fmt"
     8  	"sync"
     9  	"time"
    10  
    11  	"launchpad.net/tomb"
    12  
    13  	"launchpad.net/juju-core/errors"
    14  	"launchpad.net/juju-core/replicaset"
    15  	"launchpad.net/juju-core/state"
    16  	"launchpad.net/juju-core/worker"
    17  )
    18  
    19  type stateInterface interface {
    20  	Machine(id string) (stateMachine, error)
    21  	WatchStateServerInfo() state.NotifyWatcher
    22  	StateServerInfo() (*state.StateServerInfo, error)
    23  	MongoSession() mongoSession
    24  }
    25  
    26  type stateMachine interface {
    27  	Id() string
    28  	Refresh() error
    29  	Watch() state.NotifyWatcher
    30  	WantsVote() bool
    31  	HasVote() bool
    32  	SetHasVote(hasVote bool) error
    33  	StateHostPort() string
    34  }
    35  
    36  type mongoSession interface {
    37  	CurrentStatus() (*replicaset.Status, error)
    38  	CurrentMembers() ([]replicaset.Member, error)
    39  	Set([]replicaset.Member) error
    40  }
    41  
    42  // notifyFunc holds a function that is sent
    43  // to the main worker loop to fetch new information
    44  // when something changes. It reports whether
    45  // the information has actually changed (and by implication
    46  // whether the replica set may need to be changed).
    47  type notifyFunc func() (bool, error)
    48  
    49  var (
    50  	// If we fail to set the mongo replica set members,
    51  	// we retry at the following interval until we succeed.
    52  	retryInterval = 2 * time.Second
    53  
    54  	// pollInterval holds the interval at which the replica set
    55  	// members will be updated even in the absence of changes
    56  	// to State. This enables us to make changes to members
    57  	// that are triggered by changes to member status.
    58  	//
    59  	// 10 seconds is the default time interval used by
    60  	// mongo to keep its replicas up to date.
    61  	pollInterval = 10 * time.Second
    62  )
    63  
    64  // pgWorker holds all the mutable state that we are watching.
    65  // The only goroutine that is allowed to modify this
    66  // is worker.loop - other watchers modify the
    67  // current state by calling worker.notify instead of
    68  // modifying it directly.
    69  type pgWorker struct {
    70  	tomb tomb.Tomb
    71  
    72  	// wg represents all the currently running goroutines.
    73  	// The worker main loop waits for all of these to exit
    74  	// before finishing.
    75  	wg sync.WaitGroup
    76  
    77  	// st represents the State. It is an interface for testing
    78  	// purposes only.
    79  	st stateInterface
    80  
    81  	// When something changes that might might affect
    82  	// the peer group membership, it sends a function
    83  	// on notifyCh that is run inside the main worker
    84  	// goroutine to mutate the state. It reports whether
    85  	// the state has actually changed.
    86  	notifyCh chan notifyFunc
    87  
    88  	// machines holds the set of machines we are currently
    89  	// watching (all the state server machines). Each one has an
    90  	// associated goroutine that
    91  	// watches attributes of that machine.
    92  	machines map[string]*machine
    93  }
    94  
    95  // New returns a new worker that maintains the mongo replica set
    96  // with respect to the given state.
    97  func New(st *state.State) (worker.Worker, error) {
    98  	cfg, err := st.EnvironConfig()
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  	return newWorker(&stateShim{
   103  		State:     st,
   104  		mongoPort: cfg.StatePort(),
   105  	}), nil
   106  }
   107  
   108  func newWorker(st stateInterface) worker.Worker {
   109  	w := &pgWorker{
   110  		st:       st,
   111  		notifyCh: make(chan notifyFunc),
   112  		machines: make(map[string]*machine),
   113  	}
   114  	go func() {
   115  		defer w.tomb.Done()
   116  		if err := w.loop(); err != nil {
   117  			logger.Errorf("peergrouper loop terminated: %v", err)
   118  			w.tomb.Kill(err)
   119  		}
   120  		// Wait for the various goroutines to be killed.
   121  		// N.B. we don't defer this call because
   122  		// if we do and a bug causes a panic, Wait will deadlock
   123  		// waiting for the unkilled goroutines to exit.
   124  		w.wg.Wait()
   125  	}()
   126  	return w
   127  }
   128  
   129  func (w *pgWorker) Kill() {
   130  	w.tomb.Kill(nil)
   131  }
   132  
   133  func (w *pgWorker) Wait() error {
   134  	return w.tomb.Wait()
   135  }
   136  
   137  func (w *pgWorker) loop() error {
   138  	infow := w.watchStateServerInfo()
   139  	defer infow.stop()
   140  
   141  	retry := time.NewTimer(0)
   142  	retry.Stop()
   143  	for {
   144  		select {
   145  		case f := <-w.notifyCh:
   146  			// Update our current view of the state of affairs.
   147  			changed, err := f()
   148  			if err != nil {
   149  				return err
   150  			}
   151  			if !changed {
   152  				break
   153  			}
   154  			// Try to update the replica set immediately.
   155  			retry.Reset(0)
   156  		case <-retry.C:
   157  			if err := w.updateReplicaset(); err != nil {
   158  				if _, isReplicaSetError := err.(*replicaSetError); !isReplicaSetError {
   159  					return err
   160  				}
   161  				logger.Errorf("cannot set replicaset: %v", err)
   162  				retry.Reset(retryInterval)
   163  				break
   164  			}
   165  
   166  			// Update the replica set members occasionally
   167  			// to keep them up to date with the current
   168  			// replica set member statuses.
   169  			retry.Reset(pollInterval)
   170  		case <-w.tomb.Dying():
   171  			return tomb.ErrDying
   172  		}
   173  	}
   174  }
   175  
   176  // notify sends the given notification function to
   177  // the worker main loop to be executed.
   178  func (w *pgWorker) notify(f notifyFunc) bool {
   179  	select {
   180  	case w.notifyCh <- f:
   181  		return true
   182  	case <-w.tomb.Dying():
   183  		return false
   184  	}
   185  }
   186  
   187  // getPeerGroupInfo collates current session information about the
   188  // mongo peer group with information from state machines.
   189  func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) {
   190  	session := w.st.MongoSession()
   191  	info := &peerGroupInfo{}
   192  	var err error
   193  	status, err := session.CurrentStatus()
   194  	if err != nil {
   195  		return nil, fmt.Errorf("cannot get replica set status: %v", err)
   196  	}
   197  	info.statuses = status.Members
   198  	info.members, err = session.CurrentMembers()
   199  	if err != nil {
   200  		return nil, fmt.Errorf("cannot get replica set members: %v", err)
   201  	}
   202  	info.machines = w.machines
   203  	return info, nil
   204  }
   205  
   206  // replicaSetError holds an error returned as a result
   207  // of calling replicaset.Set. As this is expected to fail
   208  // in the normal course of things, it needs special treatment.
   209  type replicaSetError struct {
   210  	error
   211  }
   212  
   213  // updateReplicaset sets the current replica set members, and applies the
   214  // given voting status to machines in the state.
   215  func (w *pgWorker) updateReplicaset() error {
   216  	info, err := w.peerGroupInfo()
   217  	if err != nil {
   218  		return err
   219  	}
   220  	members, voting, err := desiredPeerGroup(info)
   221  	if err != nil {
   222  		return fmt.Errorf("cannot compute desired peer group: %v", err)
   223  	}
   224  	if members == nil {
   225  		logger.Debugf("no change in desired peer group")
   226  		return nil
   227  	}
   228  	logger.Debugf("desired peer group members: %#v", members)
   229  	// We cannot change the HasVote flag of a machine in state at exactly
   230  	// the same moment as changing its voting status in the replica set.
   231  	//
   232  	// Thus we need to be careful that a machine which is actually a voting
   233  	// member is not seen to not have a vote, because otherwise
   234  	// there is nothing to prevent the machine being removed.
   235  	//
   236  	// To avoid this happening, we make sure when we call SetReplicaSet,
   237  	// that the voting status of machines is the union of both old
   238  	// and new voting machines - that is the set of HasVote machines
   239  	// is a superset of all the actual voting machines.
   240  	//
   241  	// Only after the call has taken place do we reset the voting status
   242  	// of the machines that have lost their vote.
   243  	//
   244  	// If there's a crash, the voting status may not reflect the
   245  	// actual voting status for a while, but when things come
   246  	// back on line, it will be sorted out, as desiredReplicaSet
   247  	// will return the actual voting status.
   248  
   249  	var added, removed []*machine
   250  	for m, hasVote := range voting {
   251  		switch {
   252  		case hasVote && !m.stm.HasVote():
   253  			added = append(added, m)
   254  		case !hasVote && m.stm.HasVote():
   255  			removed = append(removed, m)
   256  		}
   257  	}
   258  	if err := setHasVote(added, true); err != nil {
   259  		return err
   260  	}
   261  	if err := w.st.MongoSession().Set(members); err != nil {
   262  		// We've failed to set the replica set, so revert back
   263  		// to the previous settings.
   264  		if err1 := setHasVote(added, false); err1 != nil {
   265  			logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1)
   266  		}
   267  		return &replicaSetError{err}
   268  	}
   269  	logger.Infof("successfully changed replica set to %#v", members)
   270  	if err := setHasVote(removed, false); err != nil {
   271  		return err
   272  	}
   273  	return nil
   274  }
   275  
   276  // start runs the given loop function until it returns.
   277  // When it returns, the receiving pgWorker is killed with
   278  // the returned error.
   279  func (w *pgWorker) start(loop func() error) {
   280  	w.wg.Add(1)
   281  	go func() {
   282  		defer w.wg.Done()
   283  		if err := loop(); err != nil {
   284  			w.tomb.Kill(err)
   285  		}
   286  	}()
   287  }
   288  
   289  // setHasVote sets the HasVote status of all the given
   290  // machines to hasVote.
   291  func setHasVote(ms []*machine, hasVote bool) error {
   292  
   293  	for _, m := range ms {
   294  		if err := m.stm.SetHasVote(hasVote); err != nil {
   295  			return fmt.Errorf("cannot set voting status of %q to %v: %v", m.id, hasVote, err)
   296  		}
   297  	}
   298  	return nil
   299  }
   300  
   301  // serverInfoWatcher watches the state server info and
   302  // notifies the worker when it changes.
   303  type serverInfoWatcher struct {
   304  	worker  *pgWorker
   305  	watcher state.NotifyWatcher
   306  }
   307  
   308  func (w *pgWorker) watchStateServerInfo() *serverInfoWatcher {
   309  	infow := &serverInfoWatcher{
   310  		worker:  w,
   311  		watcher: w.st.WatchStateServerInfo(),
   312  	}
   313  	w.start(infow.loop)
   314  	return infow
   315  }
   316  
   317  func (infow *serverInfoWatcher) loop() error {
   318  	for {
   319  		select {
   320  		case _, ok := <-infow.watcher.Changes():
   321  			if !ok {
   322  				return infow.watcher.Err()
   323  			}
   324  			infow.worker.notify(infow.updateMachines)
   325  		case <-infow.worker.tomb.Dying():
   326  			return tomb.ErrDying
   327  		}
   328  	}
   329  }
   330  
   331  func (infow *serverInfoWatcher) stop() {
   332  	infow.watcher.Stop()
   333  }
   334  
   335  // updateMachines is a notifyFunc that updates the current
   336  // machines when the state server info has changed.
   337  func (infow *serverInfoWatcher) updateMachines() (bool, error) {
   338  	info, err := infow.worker.st.StateServerInfo()
   339  	if err != nil {
   340  		return false, fmt.Errorf("cannot get state server info: %v", err)
   341  	}
   342  	changed := false
   343  	// Stop machine goroutines that no longer correspond to state server
   344  	// machines.
   345  	for _, m := range infow.worker.machines {
   346  		if !inStrings(m.id, info.MachineIds) {
   347  			m.stop()
   348  			delete(infow.worker.machines, m.id)
   349  			changed = true
   350  		}
   351  	}
   352  	// Start machines with no watcher
   353  	for _, id := range info.MachineIds {
   354  		if _, ok := infow.worker.machines[id]; ok {
   355  			continue
   356  		}
   357  		logger.Debugf("found new machine %q", id)
   358  		stm, err := infow.worker.st.Machine(id)
   359  		if err != nil {
   360  			if errors.IsNotFoundError(err) {
   361  				// If the machine isn't found, it must have been
   362  				// removed and will soon enough be removed
   363  				// from the state server list. This will probably
   364  				// never happen, but we'll code defensively anyway.
   365  				logger.Warningf("machine %q from state server list not found", id)
   366  				continue
   367  			}
   368  			return false, fmt.Errorf("cannot get machine %q: %v", id, err)
   369  		}
   370  		infow.worker.machines[id] = infow.worker.newMachine(stm)
   371  		changed = true
   372  	}
   373  	return changed, nil
   374  }
   375  
   376  // machine represents a machine in State.
   377  type machine struct {
   378  	id        string
   379  	wantsVote bool
   380  	hostPort  string
   381  
   382  	worker         *pgWorker
   383  	stm            stateMachine
   384  	machineWatcher state.NotifyWatcher
   385  }
   386  
   387  func (m *machine) String() string {
   388  	return m.id
   389  }
   390  
   391  func (m *machine) GoString() string {
   392  	return fmt.Sprintf("&peergrouper.machine{id: %q, wantsVote: %v, hostPort: %q}", m.id, m.wantsVote, m.hostPort)
   393  }
   394  
   395  func (w *pgWorker) newMachine(stm stateMachine) *machine {
   396  	m := &machine{
   397  		worker:         w,
   398  		id:             stm.Id(),
   399  		stm:            stm,
   400  		hostPort:       stm.StateHostPort(),
   401  		wantsVote:      stm.WantsVote(),
   402  		machineWatcher: stm.Watch(),
   403  	}
   404  	w.start(m.loop)
   405  	return m
   406  }
   407  
   408  func (m *machine) loop() error {
   409  	for {
   410  		select {
   411  		case _, ok := <-m.machineWatcher.Changes():
   412  			if !ok {
   413  				return m.machineWatcher.Err()
   414  			}
   415  			m.worker.notify(m.refresh)
   416  		case <-m.worker.tomb.Dying():
   417  			return nil
   418  		}
   419  	}
   420  }
   421  
   422  func (m *machine) stop() {
   423  	m.machineWatcher.Stop()
   424  }
   425  
   426  func (m *machine) refresh() (bool, error) {
   427  	if err := m.stm.Refresh(); err != nil {
   428  		if errors.IsNotFoundError(err) {
   429  			// We want to be robust when the machine
   430  			// state is out of date with respect to the
   431  			// state server info, so if the machine
   432  			// has been removed, just assume that
   433  			// no change has happened - the machine
   434  			// loop will be stopped very soon anyway.
   435  			return false, nil
   436  		}
   437  		return false, err
   438  	}
   439  	changed := false
   440  	if wantsVote := m.stm.WantsVote(); wantsVote != m.wantsVote {
   441  		m.wantsVote = wantsVote
   442  		changed = true
   443  	}
   444  	if hostPort := m.stm.StateHostPort(); hostPort != m.hostPort {
   445  		m.hostPort = hostPort
   446  		changed = true
   447  	}
   448  	return changed, nil
   449  }
   450  
   451  func inStrings(t string, ss []string) bool {
   452  	for _, s := range ss {
   453  		if s == t {
   454  			return true
   455  		}
   456  	}
   457  	return false
   458  }