github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/uniter/modes.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"gopkg.in/juju/charm.v5"
    12  	"gopkg.in/juju/charm.v5/hooks"
    13  	"launchpad.net/tomb"
    14  
    15  	"github.com/juju/juju/apiserver/params"
    16  	"github.com/juju/juju/state/watcher"
    17  	"github.com/juju/juju/worker"
    18  	"github.com/juju/juju/worker/uniter/hook"
    19  	"github.com/juju/juju/worker/uniter/operation"
    20  )
    21  
    22  // setAgentStatus sets the unit's status if it has changed since last time this method was called.
    23  func setAgentStatus(u *Uniter, status params.Status, info string, data map[string]interface{}) error {
    24  	u.setStatusMutex.Lock()
    25  	defer u.setStatusMutex.Unlock()
    26  	if u.lastReportedStatus == status && u.lastReportedMessage == info {
    27  		return nil
    28  	}
    29  	u.lastReportedStatus = status
    30  	u.lastReportedMessage = info
    31  	logger.Debugf("[AGENT-STATUS] %s: %s", status, info)
    32  	return u.unit.SetAgentStatus(status, info, data)
    33  }
    34  
    35  // updateAgentStatus updates the agent status to reflect what the uniter is doing,
    36  // or to report on an error.
    37  func updateAgentStatus(u *Uniter, userMessage string, err error) {
    38  	// If there was an error performing the operation, set the state
    39  	// of the agent to Failed.
    40  	if err != nil {
    41  		msg := fmt.Sprintf("%s: %v", userMessage, err)
    42  		err2 := setAgentStatus(u, params.StatusFailed, msg, nil)
    43  		if err2 != nil {
    44  			logger.Errorf("updating agent status: %v", err2)
    45  		}
    46  		return
    47  	}
    48  	// Anything else, the uniter is doing something, running a hook or action etc.
    49  	err2 := setAgentStatus(u, params.StatusExecuting, userMessage, nil)
    50  	if err2 != nil {
    51  		logger.Errorf("updating agent status: %v", err2)
    52  	}
    53  }
    54  
    55  // Mode defines the signature of the functions that implement the possible
    56  // states of a running Uniter.
    57  type Mode func(u *Uniter) (Mode, error)
    58  
    59  // ModeContinue determines what action to take based on persistent uniter state.
    60  func ModeContinue(u *Uniter) (next Mode, err error) {
    61  	defer modeContext("ModeContinue", &err)()
    62  	opState := u.operationState()
    63  
    64  	// Resume interrupted deployment operations.
    65  	if opState.Kind == operation.Install {
    66  		logger.Infof("resuming charm install")
    67  		return ModeInstalling(opState.CharmURL)
    68  	} else if opState.Kind == operation.Upgrade {
    69  		logger.Infof("resuming charm upgrade")
    70  		return ModeUpgrading(opState.CharmURL), nil
    71  	}
    72  
    73  	// If we got this far, we should have an installed charm,
    74  	// so initialize the metrics collector according to what's
    75  	// currently deployed.
    76  	if err := u.initializeMetricsCollector(); err != nil {
    77  		return nil, errors.Trace(err)
    78  	}
    79  
    80  	// Check for any leadership change, and enact it if possible.
    81  	logger.Infof("checking leadership status")
    82  	// If we've already accepted leadership, we don't need to do it again.
    83  	canAcceptLeader := !opState.Leader
    84  	select {
    85  	// If the unit's shutting down, we shouldn't accept it.
    86  	case <-u.f.UnitDying():
    87  		canAcceptLeader = false
    88  	default:
    89  		// If we're in an unexpected mode (eg pending hook) we shouldn't try either.
    90  		if opState.Kind != operation.Continue {
    91  			canAcceptLeader = false
    92  		}
    93  	}
    94  
    95  	// NOTE: the Wait() looks scary, but a ClaimLeadership ticket should always
    96  	// complete quickly; worst-case is API latency time, but it's designed that
    97  	// it should be vanishingly rare to hit that code path.
    98  	isLeader := u.leadershipTracker.ClaimLeader().Wait()
    99  	var creator creator
   100  	switch {
   101  	case isLeader && canAcceptLeader:
   102  		creator = newAcceptLeadershipOp()
   103  	case opState.Leader && !isLeader:
   104  		creator = newResignLeadershipOp()
   105  	}
   106  	if creator != nil {
   107  		return continueAfter(u, creator)
   108  	}
   109  	logger.Infof("leadership status is up-to-date")
   110  
   111  	switch opState.Kind {
   112  	case operation.RunAction:
   113  		// TODO(fwereade): we *should* handle interrupted actions, and make sure
   114  		// they're marked as failed, but that's not for now.
   115  		if opState.Hook != nil {
   116  			logger.Infof("found incomplete action %q; ignoring", opState.ActionId)
   117  			logger.Infof("recommitting prior %q hook", opState.Hook.Kind)
   118  			creator = newSkipHookOp(*opState.Hook)
   119  		} else {
   120  			logger.Infof("%q hook is nil", operation.RunAction)
   121  		}
   122  	case operation.RunHook:
   123  		switch opState.Step {
   124  		case operation.Pending:
   125  			logger.Infof("awaiting error resolution for %q hook", opState.Hook.Kind)
   126  			return ModeHookError, nil
   127  		case operation.Queued:
   128  			logger.Infof("found queued %q hook", opState.Hook.Kind)
   129  			// Ensure storage-attached hooks are run before install
   130  			// or upgrade hooks.
   131  			switch opState.Hook.Kind {
   132  			case hooks.UpgradeCharm:
   133  				// Force a refresh of all storage attachments,
   134  				// so we find out about new ones introduced
   135  				// by the charm upgrade.
   136  				if err := u.storage.Refresh(); err != nil {
   137  					return nil, errors.Trace(err)
   138  				}
   139  				fallthrough
   140  			case hooks.Install:
   141  				if err := waitStorage(u); err != nil {
   142  					return nil, errors.Trace(err)
   143  				}
   144  			}
   145  			creator = newRunHookOp(*opState.Hook)
   146  		case operation.Done:
   147  			logger.Infof("committing %q hook", opState.Hook.Kind)
   148  			creator = newSkipHookOp(*opState.Hook)
   149  		}
   150  	case operation.Continue:
   151  		if opState.Stopped {
   152  			logger.Infof("opState.Stopped == true; transition to ModeTerminating")
   153  			return ModeTerminating, nil
   154  		}
   155  		logger.Infof("no operations in progress; waiting for changes")
   156  		return ModeAbide, nil
   157  	default:
   158  		return nil, errors.Errorf("unknown operation kind %v", opState.Kind)
   159  	}
   160  	return continueAfter(u, creator)
   161  }
   162  
   163  // ModeInstalling is responsible for the initial charm deployment. If an install
   164  // operation were to set an appropriate status, it shouldn't be necessary; but see
   165  // ModeUpgrading for discussion relevant to both.
   166  func ModeInstalling(curl *charm.URL) (next Mode, err error) {
   167  	name := fmt.Sprintf("ModeInstalling %s", curl)
   168  	return func(u *Uniter) (next Mode, err error) {
   169  		defer modeContext(name, &err)()
   170  		return continueAfter(u, newInstallOp(curl))
   171  	}, nil
   172  }
   173  
   174  // ModeUpgrading is responsible for upgrading the charm. It shouldn't really
   175  // need to be a mode at all -- it's just running a single operation -- but
   176  // it's not safe to call it inside arbitrary other modes, because failing to
   177  // pass through ModeContinue on the way out could cause a queued hook to be
   178  // accidentally skipped.
   179  func ModeUpgrading(curl *charm.URL) Mode {
   180  	name := fmt.Sprintf("ModeUpgrading %s", curl)
   181  	return func(u *Uniter) (next Mode, err error) {
   182  		defer modeContext(name, &err)()
   183  		return continueAfter(u, newUpgradeOp(curl))
   184  	}
   185  }
   186  
   187  // ModeTerminating marks the unit dead and returns ErrTerminateAgent.
   188  func ModeTerminating(u *Uniter) (next Mode, err error) {
   189  	defer modeContext("ModeTerminating", &err)()
   190  	w, err := u.unit.Watch()
   191  	if err != nil {
   192  		return nil, errors.Trace(err)
   193  	}
   194  
   195  	defer watcher.Stop(w, &u.tomb)
   196  
   197  	for {
   198  		select {
   199  		case <-u.tomb.Dying():
   200  			return nil, tomb.ErrDying
   201  		case actionId := <-u.f.ActionEvents():
   202  			creator := newActionOp(actionId)
   203  			if err := u.runOperation(creator); err != nil {
   204  				return nil, errors.Trace(err)
   205  			}
   206  		case _, ok := <-w.Changes():
   207  			if !ok {
   208  				return nil, watcher.EnsureErr(w)
   209  			}
   210  			if err := u.unit.Refresh(); err != nil {
   211  				return nil, errors.Trace(err)
   212  			}
   213  			if hasSubs, err := u.unit.HasSubordinates(); err != nil {
   214  				return nil, errors.Trace(err)
   215  			} else if hasSubs {
   216  				continue
   217  			}
   218  			// The unit is known to be Dying; so if it didn't have subordinates
   219  			// just above, it can't acquire new ones before this call.
   220  			if err := u.unit.EnsureDead(); err != nil {
   221  				return nil, errors.Trace(err)
   222  			}
   223  			return nil, worker.ErrTerminateAgent
   224  		}
   225  	}
   226  }
   227  
   228  // ModeAbide is the Uniter's usual steady state. It watches for and responds to:
   229  // * service configuration changes
   230  // * charm upgrade requests
   231  // * relation changes
   232  // * unit death
   233  // * acquisition or loss of service leadership
   234  func ModeAbide(u *Uniter) (next Mode, err error) {
   235  	defer modeContext("ModeAbide", &err)()
   236  	opState := u.operationState()
   237  	if opState.Kind != operation.Continue {
   238  		return nil, errors.Errorf("insane uniter state: %#v", opState)
   239  	}
   240  	if err := u.deployer.Fix(); err != nil {
   241  		return nil, errors.Trace(err)
   242  	}
   243  
   244  	if !opState.Leader && !u.ranLeaderSettingsChanged {
   245  		creator := newSimpleRunHookOp(hook.LeaderSettingsChanged)
   246  		if err := u.runOperation(creator); err != nil {
   247  			return nil, errors.Trace(err)
   248  		}
   249  	}
   250  
   251  	if !u.ranConfigChanged {
   252  		return continueAfter(u, newSimpleRunHookOp(hooks.ConfigChanged))
   253  	}
   254  	if !opState.Started {
   255  		return continueAfter(u, newSimpleRunHookOp(hooks.Start))
   256  	}
   257  	u.f.WantUpgradeEvent(false)
   258  	u.relations.StartHooks()
   259  	defer func() {
   260  		if e := u.relations.StopHooks(); e != nil {
   261  			if err == nil {
   262  				err = e
   263  			} else {
   264  				logger.Errorf("error while stopping hooks: %v", e)
   265  			}
   266  		}
   267  	}()
   268  
   269  	select {
   270  	case <-u.f.UnitDying():
   271  		return modeAbideDyingLoop(u)
   272  	default:
   273  	}
   274  	return modeAbideAliveLoop(u)
   275  }
   276  
   277  // idleWaitTime is the time after which, if there are no uniter events,
   278  // the agent state becomes idle.
   279  var idleWaitTime = 2 * time.Second
   280  
   281  // modeAbideAliveLoop handles all state changes for ModeAbide when the unit
   282  // is in an Alive state.
   283  func modeAbideAliveLoop(u *Uniter) (Mode, error) {
   284  	var leaderElected, leaderDeposed <-chan struct{}
   285  	for {
   286  		// We expect one or none of these vars to be non-nil; and if none
   287  		// are, we set the one that should trigger when our leadership state
   288  		// differs from what we have recorded locally.
   289  		if leaderElected == nil && leaderDeposed == nil {
   290  			if u.operationState().Leader {
   291  				logger.Infof("waiting to lose leadership")
   292  				leaderDeposed = u.leadershipTracker.WaitMinion().Ready()
   293  			} else {
   294  				logger.Infof("waiting to gain leadership")
   295  				leaderElected = u.leadershipTracker.WaitLeader().Ready()
   296  			}
   297  		}
   298  
   299  		// collect-metrics hook
   300  		lastCollectMetrics := time.Unix(u.operationState().CollectMetricsTime, 0)
   301  		collectMetricsSignal := u.collectMetricsAt(
   302  			time.Now(), lastCollectMetrics, metricsPollInterval,
   303  		)
   304  
   305  		// update-status hook
   306  		lastUpdateStatus := time.Unix(u.operationState().UpdateStatusTime, 0)
   307  		updateStatusSignal := u.updateStatusAt(
   308  			time.Now(), lastUpdateStatus, statusPollInterval,
   309  		)
   310  
   311  		var creator creator
   312  		select {
   313  		case <-time.After(idleWaitTime):
   314  			if err := setAgentStatus(u, params.StatusIdle, "", nil); err != nil {
   315  				return nil, errors.Trace(err)
   316  			}
   317  			continue
   318  		case <-u.tomb.Dying():
   319  			return nil, tomb.ErrDying
   320  		case <-u.f.UnitDying():
   321  			return modeAbideDyingLoop(u)
   322  		case curl := <-u.f.UpgradeEvents():
   323  			return ModeUpgrading(curl), nil
   324  		case ids := <-u.f.RelationsEvents():
   325  			creator = newUpdateRelationsOp(ids)
   326  		case actionId := <-u.f.ActionEvents():
   327  			creator = newActionOp(actionId)
   328  		case tags := <-u.f.StorageEvents():
   329  			creator = newUpdateStorageOp(tags)
   330  		case <-u.f.ConfigEvents():
   331  			creator = newSimpleRunHookOp(hooks.ConfigChanged)
   332  		case <-u.f.MeterStatusEvents():
   333  			creator = newSimpleRunHookOp(hooks.MeterStatusChanged)
   334  		case <-collectMetricsSignal:
   335  			creator = newSimpleRunHookOp(hooks.CollectMetrics)
   336  		case <-updateStatusSignal:
   337  			creator = newSimpleRunHookOp(hooks.UpdateStatus)
   338  		case hookInfo := <-u.relations.Hooks():
   339  			creator = newRunHookOp(hookInfo)
   340  		case hookInfo := <-u.storage.Hooks():
   341  			creator = newRunHookOp(hookInfo)
   342  		case <-leaderElected:
   343  			// This operation queues a hook, better to let ModeContinue pick up
   344  			// after it than to duplicate queued-hook handling here.
   345  			return continueAfter(u, newAcceptLeadershipOp())
   346  		case <-leaderDeposed:
   347  			leaderDeposed = nil
   348  			creator = newResignLeadershipOp()
   349  		case <-u.f.LeaderSettingsEvents():
   350  			creator = newSimpleRunHookOp(hook.LeaderSettingsChanged)
   351  		}
   352  		if err := u.runOperation(creator); err != nil {
   353  			return nil, errors.Trace(err)
   354  		}
   355  	}
   356  }
   357  
   358  // modeAbideDyingLoop handles the proper termination of all relations in
   359  // response to a Dying unit.
   360  func modeAbideDyingLoop(u *Uniter) (next Mode, err error) {
   361  	if err := u.unit.Refresh(); err != nil {
   362  		return nil, errors.Trace(err)
   363  	}
   364  	if err = u.unit.DestroyAllSubordinates(); err != nil {
   365  		return nil, errors.Trace(err)
   366  	}
   367  	if err := u.relations.SetDying(); err != nil {
   368  		return nil, errors.Trace(err)
   369  	}
   370  	if u.operationState().Leader {
   371  		if err := u.runOperation(newResignLeadershipOp()); err != nil {
   372  			return nil, errors.Trace(err)
   373  		}
   374  		// TODO(fwereade): we ought to inform the tracker that we're shutting down
   375  		// (and no longer wish to continue renewing our lease) so that the tracker
   376  		// can then report minionhood at all times, and thus prevent the is-leader
   377  		// and leader-set hook tools from acting in a correct but misleading way
   378  		// (ie continuing to act as though leader after leader-deposed has run).
   379  	}
   380  	if err := u.storage.SetDying(); err != nil {
   381  		return nil, errors.Trace(err)
   382  	}
   383  	for {
   384  		if len(u.relations.GetInfo()) == 0 && u.storage.Empty() {
   385  			return continueAfter(u, newSimpleRunHookOp(hooks.Stop))
   386  		}
   387  		var creator creator
   388  		select {
   389  		case <-u.tomb.Dying():
   390  			return nil, tomb.ErrDying
   391  		case actionId := <-u.f.ActionEvents():
   392  			creator = newActionOp(actionId)
   393  		case <-u.f.ConfigEvents():
   394  			creator = newSimpleRunHookOp(hooks.ConfigChanged)
   395  		case <-u.f.LeaderSettingsEvents():
   396  			creator = newSimpleRunHookOp(hook.LeaderSettingsChanged)
   397  		case hookInfo := <-u.relations.Hooks():
   398  			creator = newRunHookOp(hookInfo)
   399  		case hookInfo := <-u.storage.Hooks():
   400  			creator = newRunHookOp(hookInfo)
   401  		}
   402  		if err := u.runOperation(creator); err != nil {
   403  			return nil, errors.Trace(err)
   404  		}
   405  	}
   406  }
   407  
   408  // waitStorage waits until all storage attachments are provisioned
   409  // and their hooks processed.
   410  func waitStorage(u *Uniter) error {
   411  	if u.storage.Pending() == 0 {
   412  		return nil
   413  	}
   414  	logger.Infof("waiting for storage attachments")
   415  	for u.storage.Pending() > 0 {
   416  		var creator creator
   417  		select {
   418  		case <-u.tomb.Dying():
   419  			return tomb.ErrDying
   420  		case <-u.f.UnitDying():
   421  			// Unit is shutting down; no need to handle any
   422  			// more storage-attached hooks. We will process
   423  			// required storage-detaching hooks in ModeAbideDying.
   424  			return nil
   425  		case tags := <-u.f.StorageEvents():
   426  			creator = newUpdateStorageOp(tags)
   427  		case hookInfo := <-u.storage.Hooks():
   428  			creator = newRunHookOp(hookInfo)
   429  		}
   430  		if err := u.runOperation(creator); err != nil {
   431  			return errors.Trace(err)
   432  		}
   433  	}
   434  	logger.Infof("storage attachments ready")
   435  	return nil
   436  }
   437  
   438  // ModeHookError is responsible for watching and responding to:
   439  // * user resolution of hook errors
   440  // * forced charm upgrade requests
   441  // * loss of service leadership
   442  func ModeHookError(u *Uniter) (next Mode, err error) {
   443  	defer modeContext("ModeHookError", &err)()
   444  	opState := u.operationState()
   445  	if opState.Kind != operation.RunHook || opState.Step != operation.Pending {
   446  		return nil, errors.Errorf("insane uniter state: %#v", u.operationState())
   447  	}
   448  
   449  	// Create error information for status.
   450  	hookInfo := *opState.Hook
   451  	hookName := string(hookInfo.Kind)
   452  	statusData := map[string]interface{}{}
   453  	if hookInfo.Kind.IsRelation() {
   454  		statusData["relation-id"] = hookInfo.RelationId
   455  		if hookInfo.RemoteUnit != "" {
   456  			statusData["remote-unit"] = hookInfo.RemoteUnit
   457  		}
   458  		relationName, err := u.relations.Name(hookInfo.RelationId)
   459  		if err != nil {
   460  			return nil, errors.Trace(err)
   461  		}
   462  		hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind)
   463  	}
   464  	statusData["hook"] = hookName
   465  	statusMessage := fmt.Sprintf("hook failed: %q", hookName)
   466  
   467  	// Run the select loop.
   468  	u.f.WantResolvedEvent()
   469  	u.f.WantUpgradeEvent(true)
   470  	var leaderDeposed <-chan struct{}
   471  	if opState.Leader {
   472  		leaderDeposed = u.leadershipTracker.WaitMinion().Ready()
   473  	}
   474  	for {
   475  		// The spec says we should set the workload status to Error, but that's crazy talk.
   476  		// It's the agent itself that should be in Error state. So we'll ensure the model is
   477  		// correct and translate before the user sees the data.
   478  		// ie a charm hook error results in agent error status, but is presented as a workload error.
   479  		if err = setAgentStatus(u, params.StatusError, statusMessage, statusData); err != nil {
   480  			return nil, errors.Trace(err)
   481  		}
   482  		select {
   483  		case <-u.tomb.Dying():
   484  			return nil, tomb.ErrDying
   485  		case curl := <-u.f.UpgradeEvents():
   486  			return ModeUpgrading(curl), nil
   487  		case rm := <-u.f.ResolvedEvents():
   488  			var creator creator
   489  			switch rm {
   490  			case params.ResolvedRetryHooks:
   491  				creator = newRetryHookOp(hookInfo)
   492  			case params.ResolvedNoHooks:
   493  				creator = newSkipHookOp(hookInfo)
   494  			default:
   495  				return nil, errors.Errorf("unknown resolved mode %q", rm)
   496  			}
   497  			err := u.runOperation(creator)
   498  			if errors.Cause(err) == operation.ErrHookFailed {
   499  				continue
   500  			} else if err != nil {
   501  				return nil, errors.Trace(err)
   502  			}
   503  			return ModeContinue, nil
   504  		case actionId := <-u.f.ActionEvents():
   505  			if err := u.runOperation(newActionOp(actionId)); err != nil {
   506  				return nil, errors.Trace(err)
   507  			}
   508  		case <-leaderDeposed:
   509  			// This should trigger at most once -- we can't reaccept leadership while
   510  			// in an error state.
   511  			leaderDeposed = nil
   512  			if err := u.runOperation(newResignLeadershipOp()); err != nil {
   513  				return nil, errors.Trace(err)
   514  			}
   515  		}
   516  	}
   517  }
   518  
   519  // ModeConflicted is responsible for watching and responding to:
   520  // * user resolution of charm upgrade conflicts
   521  // * forced charm upgrade requests
   522  func ModeConflicted(curl *charm.URL) Mode {
   523  	return func(u *Uniter) (next Mode, err error) {
   524  		defer modeContext("ModeConflicted", &err)()
   525  		// TODO(mue) Add helpful data here too in later CL.
   526  		// The spec says we should set the workload status to Error, but that's crazy talk.
   527  		// It's the agent itself that should be in Error state. So we'll ensure the model is
   528  		// correct and translate before the user sees the data.
   529  		// ie a charm upgrade error results in agent error status, but is presented as a workload error.
   530  		if err := setAgentStatus(u, params.StatusError, "upgrade failed", nil); err != nil {
   531  			return nil, errors.Trace(err)
   532  		}
   533  		u.f.WantResolvedEvent()
   534  		u.f.WantUpgradeEvent(true)
   535  		var creator creator
   536  		select {
   537  		case <-u.tomb.Dying():
   538  			return nil, tomb.ErrDying
   539  		case curl = <-u.f.UpgradeEvents():
   540  			creator = newRevertUpgradeOp(curl)
   541  		case <-u.f.ResolvedEvents():
   542  			creator = newResolvedUpgradeOp(curl)
   543  		}
   544  		return continueAfter(u, creator)
   545  	}
   546  }
   547  
   548  // modeContext returns a function that implements logging and common error
   549  // manipulation for Mode funcs.
   550  func modeContext(name string, err *error) func() {
   551  	logger.Infof("%s starting", name)
   552  	return func() {
   553  		logger.Infof("%s exiting", name)
   554  		*err = errors.Annotatef(*err, name)
   555  	}
   556  }
   557  
   558  // continueAfter is commonly used at the end of a Mode func to execute the
   559  // operation returned by creator and return ModeContinue (or any error).
   560  func continueAfter(u *Uniter, creator creator) (Mode, error) {
   561  	if err := u.runOperation(creator); err != nil {
   562  		return nil, errors.Trace(err)
   563  	}
   564  	return ModeContinue, nil
   565  }