github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/uniter/resolver.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  
     9  	jujucharm "github.com/juju/charm/v12"
    10  	"github.com/juju/charm/v12/hooks"
    11  	"github.com/juju/errors"
    12  
    13  	"github.com/juju/juju/core/life"
    14  	"github.com/juju/juju/core/model"
    15  	"github.com/juju/juju/rpc/params"
    16  	"github.com/juju/juju/worker"
    17  	"github.com/juju/juju/worker/uniter/hook"
    18  	"github.com/juju/juju/worker/uniter/operation"
    19  	"github.com/juju/juju/worker/uniter/remotestate"
    20  	"github.com/juju/juju/worker/uniter/resolver"
    21  	"github.com/juju/juju/wrench"
    22  )
    23  
    24  // ResolverConfig defines configuration for the uniter resolver.
    25  type ResolverConfig struct {
    26  	ModelType           model.ModelType
    27  	ClearResolved       func() error
    28  	ReportHookError     func(hook.Info) error
    29  	ShouldRetryHooks    bool
    30  	StartRetryHookTimer func()
    31  	StopRetryHookTimer  func()
    32  	VerifyCharmProfile  resolver.Resolver
    33  	UpgradeSeries       resolver.Resolver
    34  	Reboot              resolver.Resolver
    35  	Leadership          resolver.Resolver
    36  	Actions             resolver.Resolver
    37  	CreatedRelations    resolver.Resolver
    38  	Relations           resolver.Resolver
    39  	Storage             resolver.Resolver
    40  	Commands            resolver.Resolver
    41  	Secrets             resolver.Resolver
    42  	OptionalResolvers   []resolver.Resolver
    43  	Logger              Logger
    44  }
    45  
    46  type uniterResolver struct {
    47  	config                ResolverConfig
    48  	retryHookTimerStarted bool
    49  }
    50  
    51  // NewUniterResolver returns a new resolver.Resolver for the uniter.
    52  func NewUniterResolver(cfg ResolverConfig) resolver.Resolver {
    53  	return &uniterResolver{
    54  		config:                cfg,
    55  		retryHookTimerStarted: false,
    56  	}
    57  }
    58  
    59  func (s *uniterResolver) NextOp(
    60  	localState resolver.LocalState,
    61  	remoteState remotestate.Snapshot,
    62  	opFactory operation.Factory,
    63  ) (_ operation.Operation, err error) {
    64  	badge := "<unspecified>"
    65  	defer func() {
    66  		if err != nil && errors.Cause(err) != resolver.ErrNoOperation && err != resolver.ErrRestart {
    67  			s.config.Logger.Debugf("next %q operation could not be resolved: %v", badge, err)
    68  		}
    69  	}()
    70  
    71  	if remoteState.Life == life.Dead || localState.Removed {
    72  		return nil, resolver.ErrUnitDead
    73  	}
    74  	logger := s.config.Logger
    75  
    76  	// Operations for series-upgrade need to be resolved early,
    77  	// in particular because no other operations should be run when the unit
    78  	// has completed preparation and is waiting for upgrade completion.
    79  	badge = "upgrade series"
    80  	op, err := s.config.UpgradeSeries.NextOp(localState, remoteState, opFactory)
    81  	if errors.Cause(err) != resolver.ErrNoOperation {
    82  		if errors.Cause(err) == resolver.ErrDoNotProceed {
    83  			return nil, resolver.ErrNoOperation
    84  		}
    85  		return op, err
    86  	}
    87  
    88  	// Check if we need to notify the charms because a reboot was detected.
    89  	badge = "reboot"
    90  	op, err = s.config.Reboot.NextOp(localState, remoteState, opFactory)
    91  	if errors.Cause(err) != resolver.ErrNoOperation {
    92  		return op, err
    93  	}
    94  
    95  	if localState.Kind == operation.Upgrade {
    96  		badge = "upgrade"
    97  		if localState.Conflicted {
    98  			return s.nextOpConflicted(localState, remoteState, opFactory)
    99  		}
   100  		// continue upgrading the charm
   101  		logger.Infof("resuming charm upgrade")
   102  		return s.newUpgradeOperation(localState, remoteState, opFactory)
   103  	}
   104  
   105  	if localState.Restart {
   106  		// We've just run the upgrade op, which will change the
   107  		// unit's charm URL. We need to restart the resolver
   108  		// loop so that we start watching the correct events.
   109  		return nil, resolver.ErrRestart
   110  	}
   111  
   112  	if s.retryHookTimerStarted && (localState.Kind != operation.RunHook || localState.Step != operation.Pending) {
   113  		// The hook-retry timer is running, but there is no pending
   114  		// hook operation. We're not in an error state, so stop the
   115  		// timer now to reset the backoff state.
   116  		s.config.StopRetryHookTimer()
   117  		s.retryHookTimerStarted = false
   118  	}
   119  
   120  	badge = "relations"
   121  	op, err = s.config.CreatedRelations.NextOp(localState, remoteState, opFactory)
   122  	if errors.Cause(err) != resolver.ErrNoOperation {
   123  		return op, err
   124  	}
   125  
   126  	badge = "leadership"
   127  	op, err = s.config.Leadership.NextOp(localState, remoteState, opFactory)
   128  	if errors.Cause(err) != resolver.ErrNoOperation {
   129  		return op, err
   130  	}
   131  
   132  	badge = "optional"
   133  	for _, r := range s.config.OptionalResolvers {
   134  		op, err = r.NextOp(localState, remoteState, opFactory)
   135  		if errors.Cause(err) != resolver.ErrNoOperation {
   136  			return op, err
   137  		}
   138  	}
   139  
   140  	badge = "secrets"
   141  	op, err = s.config.Secrets.NextOp(localState, remoteState, opFactory)
   142  	if errors.Cause(err) != resolver.ErrNoOperation {
   143  		return op, err
   144  	}
   145  
   146  	badge = "actions"
   147  	op, err = s.config.Actions.NextOp(localState, remoteState, opFactory)
   148  	if errors.Cause(err) != resolver.ErrNoOperation {
   149  		return op, err
   150  	}
   151  
   152  	badge = "commands"
   153  	op, err = s.config.Commands.NextOp(localState, remoteState, opFactory)
   154  	if errors.Cause(err) != resolver.ErrNoOperation {
   155  		return op, err
   156  	}
   157  
   158  	badge = "storage"
   159  	op, err = s.config.Storage.NextOp(localState, remoteState, opFactory)
   160  	if errors.Cause(err) != resolver.ErrNoOperation {
   161  		return op, err
   162  	}
   163  
   164  	// If we are to shut down, we don't want to start running any more queued/pending hooks.
   165  	if remoteState.Shutdown {
   166  		badge = "shutdown"
   167  		logger.Debugf("unit agent is shutting down, will not run pending/queued hooks")
   168  		return s.nextOp(localState, remoteState, opFactory)
   169  	}
   170  
   171  	switch localState.Kind {
   172  	case operation.RunHook:
   173  		step := localState.Step
   174  		if localState.HookStep != nil {
   175  			step = *localState.HookStep
   176  		}
   177  		switch step {
   178  		case operation.Pending:
   179  			badge = "resolve hook"
   180  			logger.Infof("awaiting error resolution for %q hook", localState.Hook.Kind)
   181  			return s.nextOpHookError(localState, remoteState, opFactory)
   182  
   183  		case operation.Queued:
   184  			badge = "queued hook"
   185  			logger.Infof("found queued %q hook", localState.Hook.Kind)
   186  			if localState.Hook.Kind == hooks.Install {
   187  				// Special case: handle install in nextOp,
   188  				// so we do nothing when the unit is dying.
   189  				return s.nextOp(localState, remoteState, opFactory)
   190  			}
   191  			return opFactory.NewRunHook(*localState.Hook)
   192  
   193  		case operation.Done:
   194  			// Only check for the wrench if trace logging is enabled. Otherwise,
   195  			// we'd have to parse the charm url every time just to check to see
   196  			// if a wrench existed.
   197  			badge = "commit hook"
   198  			if localState.CharmURL != "" && logger.IsTraceEnabled() {
   199  				// If it's set, the charm url will parse.
   200  				curl := jujucharm.MustParseURL(localState.CharmURL)
   201  				if curl != nil && wrench.IsActive("hooks", fmt.Sprintf("%s-%s-error", curl.Name, localState.Hook.Kind)) {
   202  					s.config.Logger.Errorf("commit hook %q failed due to a wrench in the works", localState.Hook.Kind)
   203  					return nil, errors.Errorf("commit hook %q failed due to a wrench in the works", localState.Hook.Kind)
   204  				}
   205  			}
   206  
   207  			logger.Infof("committing %q hook", localState.Hook.Kind)
   208  			return opFactory.NewSkipHook(*localState.Hook)
   209  
   210  		default:
   211  			return nil, errors.Errorf("unknown hook operation step %v", step)
   212  		}
   213  
   214  	case operation.Continue:
   215  		badge = "idle"
   216  		logger.Debugf("no operations in progress; waiting for changes")
   217  		return s.nextOp(localState, remoteState, opFactory)
   218  
   219  	default:
   220  		return nil, errors.Errorf("unknown operation kind %v", localState.Kind)
   221  	}
   222  }
   223  
   224  // nextOpConflicted is called after an upgrade operation has failed, and hasn't
   225  // yet been resolved or reverted. When in this mode, the resolver will only
   226  // consider those two possibilities for progressing.
   227  func (s *uniterResolver) nextOpConflicted(
   228  	localState resolver.LocalState,
   229  	remoteState remotestate.Snapshot,
   230  	opFactory operation.Factory,
   231  ) (operation.Operation, error) {
   232  	// Only IAAS models deal with conflicted upgrades.
   233  	// TODO(caas) - what to do here.
   234  
   235  	// Verify the charm profile before proceeding.  No hooks to run, if the
   236  	// correct one is not yet applied.
   237  	_, err := s.config.VerifyCharmProfile.NextOp(localState, remoteState, opFactory)
   238  	if e := errors.Cause(err); e == resolver.ErrDoNotProceed {
   239  		return nil, resolver.ErrNoOperation
   240  	} else if e != resolver.ErrNoOperation {
   241  		return nil, err
   242  	}
   243  
   244  	if remoteState.ResolvedMode != params.ResolvedNone {
   245  		if err := s.config.ClearResolved(); err != nil {
   246  			return nil, errors.Trace(err)
   247  		}
   248  		return opFactory.NewResolvedUpgrade(localState.CharmURL)
   249  	}
   250  	if remoteState.ForceCharmUpgrade && s.charmModified(localState, remoteState) {
   251  		return opFactory.NewRevertUpgrade(remoteState.CharmURL)
   252  	}
   253  	return nil, resolver.ErrWaiting
   254  }
   255  
   256  func (s *uniterResolver) newUpgradeOperation(
   257  	localState resolver.LocalState,
   258  	remoteState remotestate.Snapshot,
   259  	opFactory operation.Factory,
   260  ) (operation.Operation, error) {
   261  	// Verify the charm profile before proceeding.  No hooks to run, if the
   262  	// correct one is not yet applied.
   263  	_, err := s.config.VerifyCharmProfile.NextOp(localState, remoteState, opFactory)
   264  	if e := errors.Cause(err); e == resolver.ErrDoNotProceed {
   265  		return nil, resolver.ErrNoOperation
   266  	} else if e != resolver.ErrNoOperation {
   267  		return nil, err
   268  	}
   269  	return opFactory.NewUpgrade(remoteState.CharmURL)
   270  }
   271  
   272  func (s *uniterResolver) nextOpHookError(
   273  	localState resolver.LocalState,
   274  	remoteState remotestate.Snapshot,
   275  	opFactory operation.Factory,
   276  ) (operation.Operation, error) {
   277  
   278  	// Report the hook error.
   279  	if err := s.config.ReportHookError(*localState.Hook); err != nil {
   280  		return nil, errors.Trace(err)
   281  	}
   282  
   283  	if remoteState.ForceCharmUpgrade && s.charmModified(localState, remoteState) {
   284  		return s.newUpgradeOperation(localState, remoteState, opFactory)
   285  	}
   286  
   287  	switch remoteState.ResolvedMode {
   288  	case params.ResolvedNone:
   289  		if remoteState.RetryHookVersion > localState.RetryHookVersion {
   290  			// We've been asked to retry: clear the hook timer
   291  			// started state so we'll restart it if this fails.
   292  			//
   293  			// If the hook fails again, we'll re-enter this method
   294  			// with the retry hook versions equal and restart the
   295  			// timer. If the hook succeeds, we'll enter nextOp
   296  			// and stop the timer.
   297  			s.retryHookTimerStarted = false
   298  			return opFactory.NewRunHook(*localState.Hook)
   299  		}
   300  		if !s.retryHookTimerStarted && s.config.ShouldRetryHooks {
   301  			// We haven't yet started a retry timer, so start one
   302  			// now. If we retry and fail, retryHookTimerStarted is
   303  			// cleared so that we'll still start it again.
   304  			s.config.StartRetryHookTimer()
   305  			s.retryHookTimerStarted = true
   306  		}
   307  		return nil, resolver.ErrNoOperation
   308  	case params.ResolvedRetryHooks:
   309  		s.config.StopRetryHookTimer()
   310  		s.retryHookTimerStarted = false
   311  		if err := s.config.ClearResolved(); err != nil {
   312  			return nil, errors.Trace(err)
   313  		}
   314  		return opFactory.NewRunHook(*localState.Hook)
   315  	case params.ResolvedNoHooks:
   316  		s.config.StopRetryHookTimer()
   317  		s.retryHookTimerStarted = false
   318  		if err := s.config.ClearResolved(); err != nil {
   319  			return nil, errors.Trace(err)
   320  		}
   321  		return opFactory.NewSkipHook(*localState.Hook)
   322  	default:
   323  		return nil, errors.Errorf(
   324  			"unknown resolved mode %q", remoteState.ResolvedMode,
   325  		)
   326  	}
   327  }
   328  
   329  func (s *uniterResolver) charmModified(local resolver.LocalState, remote remotestate.Snapshot) bool {
   330  	// CAAS models may not yet have read the charm url from state.
   331  	if remote.CharmURL == "" {
   332  		return false
   333  	}
   334  	if local.CharmURL != remote.CharmURL {
   335  		s.config.Logger.Debugf("upgrade from %v to %v", local.CharmURL, remote.CharmURL)
   336  		return true
   337  	}
   338  
   339  	if local.CharmModifiedVersion != remote.CharmModifiedVersion {
   340  		s.config.Logger.Debugf("upgrade from CharmModifiedVersion %v to %v", local.CharmModifiedVersion, remote.CharmModifiedVersion)
   341  		return true
   342  	}
   343  	return false
   344  }
   345  
   346  func (s *uniterResolver) nextOp(
   347  	localState resolver.LocalState,
   348  	remoteState remotestate.Snapshot,
   349  	opFactory operation.Factory,
   350  ) (operation.Operation, error) {
   351  	switch remoteState.Life {
   352  	case life.Alive:
   353  		if remoteState.Shutdown {
   354  			if localState.Started && !localState.Stopped {
   355  				return opFactory.NewRunHook(hook.Info{Kind: hooks.Stop})
   356  			} else if !localState.Started || localState.Stopped {
   357  				return nil, worker.ErrTerminateAgent
   358  			}
   359  		}
   360  	case life.Dying:
   361  		// Normally we handle relations last, but if we're dying we
   362  		// must ensure that all relations are broken first.
   363  		op, err := s.config.Relations.NextOp(localState, remoteState, opFactory)
   364  		if errors.Cause(err) != resolver.ErrNoOperation {
   365  			return op, err
   366  		}
   367  
   368  		// We're not in a hook error and the unit is Dying,
   369  		// so we should proceed to tear down.
   370  		//
   371  		// TODO(axw) move logic for cascading destruction of
   372  		//           subordinates, relation units and storage
   373  		//           attachments into state, via cleanups.
   374  		if localState.Started && !localState.Stopped {
   375  			return opFactory.NewRunHook(hook.Info{Kind: hooks.Stop})
   376  		} else if localState.Installed && !localState.Removed {
   377  			return opFactory.NewRunHook(hook.Info{Kind: hooks.Remove})
   378  		}
   379  		fallthrough
   380  	case life.Dead:
   381  		// The unit is dying/dead and stopped, so tell the uniter
   382  		// to terminate.
   383  		return nil, resolver.ErrUnitDead
   384  	}
   385  
   386  	// Now that storage hooks have run at least once, before anything else,
   387  	// we need to run the install hook.
   388  	// TODO(cmars): remove !localState.Started. It's here as a temporary
   389  	// measure because unit agent upgrades aren't being performed yet.
   390  	if !localState.Installed && !localState.Started {
   391  		return opFactory.NewRunHook(hook.Info{Kind: hooks.Install})
   392  	}
   393  
   394  	if s.charmModified(localState, remoteState) {
   395  		return s.newUpgradeOperation(localState, remoteState, opFactory)
   396  	}
   397  
   398  	configHashChanged := localState.ConfigHash != remoteState.ConfigHash
   399  	trustHashChanged := localState.TrustHash != remoteState.TrustHash
   400  	addressesHashChanged := localState.AddressesHash != remoteState.AddressesHash
   401  	if configHashChanged || trustHashChanged || addressesHashChanged {
   402  		return opFactory.NewRunHook(hook.Info{Kind: hooks.ConfigChanged})
   403  	}
   404  
   405  	op, err := s.config.Relations.NextOp(localState, remoteState, opFactory)
   406  	if errors.Cause(err) != resolver.ErrNoOperation {
   407  		return op, err
   408  	}
   409  
   410  	// UpdateStatus hook runs if nothing else needs to.
   411  	if localState.UpdateStatusVersion != remoteState.UpdateStatusVersion {
   412  		return opFactory.NewRunHook(hook.Info{Kind: hooks.UpdateStatus})
   413  	}
   414  
   415  	return nil, resolver.ErrNoOperation
   416  }