github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/uniter/uniter.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"sync"
    10  
    11  	jujucharm "github.com/juju/charm/v12"
    12  	"github.com/juju/clock"
    13  	"github.com/juju/errors"
    14  	"github.com/juju/names/v5"
    15  	"github.com/juju/utils/v3"
    16  	"github.com/juju/utils/v3/exec"
    17  	"github.com/juju/worker/v3"
    18  	"github.com/juju/worker/v3/catacomb"
    19  	"gopkg.in/tomb.v2"
    20  
    21  	"github.com/juju/juju/agent/tools"
    22  	"github.com/juju/juju/api/agent/uniter"
    23  	"github.com/juju/juju/core/leadership"
    24  	"github.com/juju/juju/core/life"
    25  	corelogger "github.com/juju/juju/core/logger"
    26  	"github.com/juju/juju/core/lxdprofile"
    27  	"github.com/juju/juju/core/machinelock"
    28  	"github.com/juju/juju/core/model"
    29  	"github.com/juju/juju/core/status"
    30  	"github.com/juju/juju/core/watcher"
    31  	"github.com/juju/juju/rpc/params"
    32  	jworker "github.com/juju/juju/worker"
    33  	"github.com/juju/juju/worker/fortress"
    34  	"github.com/juju/juju/worker/uniter/actions"
    35  	"github.com/juju/juju/worker/uniter/charm"
    36  	"github.com/juju/juju/worker/uniter/container"
    37  	"github.com/juju/juju/worker/uniter/hook"
    38  	uniterleadership "github.com/juju/juju/worker/uniter/leadership"
    39  	"github.com/juju/juju/worker/uniter/operation"
    40  	"github.com/juju/juju/worker/uniter/reboot"
    41  	"github.com/juju/juju/worker/uniter/relation"
    42  	"github.com/juju/juju/worker/uniter/remotestate"
    43  	"github.com/juju/juju/worker/uniter/resolver"
    44  	"github.com/juju/juju/worker/uniter/runcommands"
    45  	"github.com/juju/juju/worker/uniter/runner"
    46  	"github.com/juju/juju/worker/uniter/runner/context"
    47  	"github.com/juju/juju/worker/uniter/runner/jujuc"
    48  	"github.com/juju/juju/worker/uniter/secrets"
    49  	"github.com/juju/juju/worker/uniter/storage"
    50  	"github.com/juju/juju/worker/uniter/upgradeseries"
    51  	"github.com/juju/juju/worker/uniter/verifycharmprofile"
    52  )
    53  
    54  const (
    55  	// ErrCAASUnitDead is the error returned from terminate or init
    56  	// if the unit is Dead.
    57  	ErrCAASUnitDead = errors.ConstError("unit dead")
    58  )
    59  
    60  // A UniterExecutionObserver gets the appropriate methods called when a hook
    61  // is executed and either succeeds or fails.  Missing hooks don't get reported
    62  // in this way.
    63  type UniterExecutionObserver interface {
    64  	HookCompleted(hookName string)
    65  	HookFailed(hookName string)
    66  }
    67  
    68  // RebootQuerier is implemented by types that can deliver one-off machine
    69  // reboot notifications to entities.
    70  type RebootQuerier interface {
    71  	Query(tag names.Tag) (bool, error)
    72  }
    73  
    74  // SecretsClient provides methods used by the remote state watcher, hook context,
    75  // and op callbacks.
    76  type SecretsClient interface {
    77  	remotestate.SecretsClient
    78  	context.SecretsAccessor
    79  }
    80  
    81  // RemoteInitFunc is used to init remote state
    82  type RemoteInitFunc func(remotestate.ContainerRunningStatus, <-chan struct{}) error
    83  
    84  // Uniter implements the capabilities of the unit agent, for example running hooks.
    85  type Uniter struct {
    86  	catacomb                     catacomb.Catacomb
    87  	st                           *uniter.State
    88  	secretsClient                SecretsClient
    89  	secretsBackendGetter         context.SecretsBackendGetter
    90  	paths                        Paths
    91  	unit                         *uniter.Unit
    92  	resources                    *uniter.ResourcesFacadeClient
    93  	payloads                     *uniter.PayloadFacadeClient
    94  	modelType                    model.ModelType
    95  	sidecar                      bool
    96  	enforcedCharmModifiedVersion int
    97  	storage                      *storage.Attachments
    98  	clock                        clock.Clock
    99  
   100  	relationStateTracker relation.RelationStateTracker
   101  
   102  	secretsTracker secrets.SecretStateTracker
   103  
   104  	// Cache the last reported status information
   105  	// so we don't make unnecessary api calls.
   106  	setStatusMutex      sync.Mutex
   107  	lastReportedStatus  status.Status
   108  	lastReportedMessage string
   109  
   110  	operationFactory        operation.Factory
   111  	operationExecutor       operation.Executor
   112  	newOperationExecutor    NewOperationExecutorFunc
   113  	newProcessRunner        runner.NewRunnerFunc
   114  	newDeployer             charm.NewDeployerFunc
   115  	newRemoteRunnerExecutor NewRunnerExecutorFunc
   116  	translateResolverErr    func(error) error
   117  
   118  	leadershipTracker leadership.TrackerWorker
   119  	charmDirGuard     fortress.Guard
   120  
   121  	hookLock machinelock.Lock
   122  
   123  	// secretRotateWatcherFunc returns a watcher that triggers when secrets
   124  	// owned by this unit ot its application should be rotated.
   125  	secretRotateWatcherFunc remotestate.SecretTriggerWatcherFunc
   126  
   127  	// secretExpiryWatcherFunc returns a watcher that triggers when
   128  	// secret revisions owned by this unit or its application should be expired.
   129  	secretExpiryWatcherFunc remotestate.SecretTriggerWatcherFunc
   130  
   131  	Probe Probe
   132  
   133  	// TODO(axw) move the runListener and run-command code outside of the
   134  	// uniter, and introduce a separate worker. Each worker would feed
   135  	// operations to a single, synchronized runner to execute.
   136  	runListener      *RunListener
   137  	localRunListener *RunListener
   138  	commands         runcommands.Commands
   139  	commandChannel   chan string
   140  
   141  	// The execution observer is only used in tests at this stage. Should this
   142  	// need to be extended, perhaps a list of observers would be needed.
   143  	observer UniterExecutionObserver
   144  
   145  	// updateStatusAt defines a function that will be used to generate signals for
   146  	// the update-status hook
   147  	updateStatusAt remotestate.UpdateStatusTimerFunc
   148  
   149  	// containerRunningStatusChannel, if set, is used to signal a change in the
   150  	// unit's status. It is passed to the remote state watcher.
   151  	containerRunningStatusChannel watcher.NotifyChannel
   152  
   153  	// containerRunningStatusFunc is used to determine the unit's running status.
   154  	containerRunningStatusFunc remotestate.ContainerRunningStatusFunc
   155  
   156  	// remoteInitFunc is used to init remote charm state.
   157  	remoteInitFunc RemoteInitFunc
   158  
   159  	// isRemoteUnit is true when the unit is remotely deployed.
   160  	isRemoteUnit bool
   161  
   162  	// containerNames will have a list of the workload containers created alongside this
   163  	// unit agent.
   164  	containerNames []string
   165  
   166  	workloadEvents       container.WorkloadEvents
   167  	workloadEventChannel chan string
   168  
   169  	newPebbleClient NewPebbleClientFunc
   170  
   171  	// hookRetryStrategy represents configuration for hook retries
   172  	hookRetryStrategy params.RetryStrategy
   173  
   174  	// downloader is the downloader that should be used to get the charm
   175  	// archive.
   176  	downloader charm.Downloader
   177  
   178  	// rebootQuerier allows the uniter to detect when the machine has
   179  	// rebooted so we can notify the charms accordingly.
   180  	rebootQuerier RebootQuerier
   181  	logger        Logger
   182  
   183  	// shutdownChannel is passed to the remote state watcher. When true is
   184  	// sent on the channel, it causes the uniter to start the shutdown process.
   185  	shutdownChannel chan bool
   186  }
   187  
   188  // UniterParams hold all the necessary parameters for a new Uniter.
   189  type UniterParams struct {
   190  	UniterFacade                  *uniter.State
   191  	ResourcesFacade               *uniter.ResourcesFacadeClient
   192  	PayloadFacade                 *uniter.PayloadFacadeClient
   193  	SecretsClient                 SecretsClient
   194  	SecretsBackendGetter          context.SecretsBackendGetter
   195  	UnitTag                       names.UnitTag
   196  	ModelType                     model.ModelType
   197  	LeadershipTrackerFunc         func(names.UnitTag) leadership.TrackerWorker
   198  	SecretRotateWatcherFunc       remotestate.SecretTriggerWatcherFunc
   199  	SecretExpiryWatcherFunc       remotestate.SecretTriggerWatcherFunc
   200  	DataDir                       string
   201  	Downloader                    charm.Downloader
   202  	MachineLock                   machinelock.Lock
   203  	CharmDirGuard                 fortress.Guard
   204  	UpdateStatusSignal            remotestate.UpdateStatusTimerFunc
   205  	HookRetryStrategy             params.RetryStrategy
   206  	NewOperationExecutor          NewOperationExecutorFunc
   207  	NewProcessRunner              runner.NewRunnerFunc
   208  	NewDeployer                   charm.NewDeployerFunc
   209  	NewRemoteRunnerExecutor       NewRunnerExecutorFunc
   210  	RemoteInitFunc                RemoteInitFunc
   211  	RunListener                   *RunListener
   212  	TranslateResolverErr          func(error) error
   213  	Clock                         clock.Clock
   214  	ContainerRunningStatusChannel watcher.NotifyChannel
   215  	ContainerRunningStatusFunc    remotestate.ContainerRunningStatusFunc
   216  	IsRemoteUnit                  bool
   217  	SocketConfig                  *SocketConfig
   218  	// TODO (mattyw, wallyworld, fwereade) Having the observer here make this approach a bit more legitimate, but it isn't.
   219  	// the observer is only a stop gap to be used in tests. A better approach would be to have the uniter tests start hooks
   220  	// that write to files, and have the tests watch the output to know that hooks have finished.
   221  	Observer                     UniterExecutionObserver
   222  	RebootQuerier                RebootQuerier
   223  	Logger                       Logger
   224  	Sidecar                      bool
   225  	EnforcedCharmModifiedVersion int
   226  	ContainerNames               []string
   227  	NewPebbleClient              NewPebbleClientFunc
   228  }
   229  
   230  // NewOperationExecutorFunc is a func which returns an operations.Executor.
   231  type NewOperationExecutorFunc func(string, operation.ExecutorConfig) (operation.Executor, error)
   232  
   233  // ProviderIDGetter defines the API to get provider ID.
   234  type ProviderIDGetter interface {
   235  	ProviderID() string
   236  	Refresh() error
   237  	Name() string
   238  }
   239  
   240  // NewRunnerExecutorFunc defines the type of the NewRunnerExecutor.
   241  type NewRunnerExecutorFunc func(ProviderIDGetter, Paths) runner.ExecFunc
   242  
   243  // NewUniter creates a new Uniter which will install, run, and upgrade
   244  // a charm on behalf of the unit with the given unitTag, by executing
   245  // hooks and operations provoked by changes in st.
   246  func NewUniter(uniterParams *UniterParams) (*Uniter, error) {
   247  	startFunc := newUniter(uniterParams)
   248  	w, err := startFunc()
   249  	return w.(*Uniter), err
   250  }
   251  
   252  // StartUniter creates a new Uniter and starts it using the specified runner.
   253  func StartUniter(runner *worker.Runner, params *UniterParams) error {
   254  	startFunc := newUniter(params)
   255  	params.Logger.Debugf("starting uniter for %q", params.UnitTag.Id())
   256  	err := runner.StartWorker(params.UnitTag.Id(), startFunc)
   257  	return errors.Annotate(err, "error starting uniter worker")
   258  }
   259  
   260  func newUniter(uniterParams *UniterParams) func() (worker.Worker, error) {
   261  	translateResolverErr := uniterParams.TranslateResolverErr
   262  	if translateResolverErr == nil {
   263  		translateResolverErr = func(err error) error { return err }
   264  	}
   265  	startFunc := func() (worker.Worker, error) {
   266  		u := &Uniter{
   267  			st:                            uniterParams.UniterFacade,
   268  			resources:                     uniterParams.ResourcesFacade,
   269  			payloads:                      uniterParams.PayloadFacade,
   270  			secretsClient:                 uniterParams.SecretsClient,
   271  			secretsBackendGetter:          uniterParams.SecretsBackendGetter,
   272  			paths:                         NewPaths(uniterParams.DataDir, uniterParams.UnitTag, uniterParams.SocketConfig),
   273  			modelType:                     uniterParams.ModelType,
   274  			hookLock:                      uniterParams.MachineLock,
   275  			leadershipTracker:             uniterParams.LeadershipTrackerFunc(uniterParams.UnitTag),
   276  			secretRotateWatcherFunc:       uniterParams.SecretRotateWatcherFunc,
   277  			secretExpiryWatcherFunc:       uniterParams.SecretExpiryWatcherFunc,
   278  			charmDirGuard:                 uniterParams.CharmDirGuard,
   279  			updateStatusAt:                uniterParams.UpdateStatusSignal,
   280  			hookRetryStrategy:             uniterParams.HookRetryStrategy,
   281  			newOperationExecutor:          uniterParams.NewOperationExecutor,
   282  			newProcessRunner:              uniterParams.NewProcessRunner,
   283  			newDeployer:                   uniterParams.NewDeployer,
   284  			newRemoteRunnerExecutor:       uniterParams.NewRemoteRunnerExecutor,
   285  			remoteInitFunc:                uniterParams.RemoteInitFunc,
   286  			translateResolverErr:          translateResolverErr,
   287  			observer:                      uniterParams.Observer,
   288  			clock:                         uniterParams.Clock,
   289  			downloader:                    uniterParams.Downloader,
   290  			containerRunningStatusChannel: uniterParams.ContainerRunningStatusChannel,
   291  			containerRunningStatusFunc:    uniterParams.ContainerRunningStatusFunc,
   292  			isRemoteUnit:                  uniterParams.IsRemoteUnit,
   293  			runListener:                   uniterParams.RunListener,
   294  			rebootQuerier:                 uniterParams.RebootQuerier,
   295  			logger:                        uniterParams.Logger,
   296  			sidecar:                       uniterParams.Sidecar,
   297  			enforcedCharmModifiedVersion:  uniterParams.EnforcedCharmModifiedVersion,
   298  			containerNames:                uniterParams.ContainerNames,
   299  			newPebbleClient:               uniterParams.NewPebbleClient,
   300  			shutdownChannel:               make(chan bool, 1),
   301  		}
   302  		plan := catacomb.Plan{
   303  			Site: &u.catacomb,
   304  			Work: func() error {
   305  				return u.loop(uniterParams.UnitTag)
   306  			},
   307  		}
   308  		if u.modelType == model.CAAS {
   309  			// For CAAS models, make sure the leadership tracker is killed when the Uniter
   310  			// dies.
   311  			plan.Init = append(plan.Init, u.leadershipTracker)
   312  		}
   313  		if err := catacomb.Invoke(plan); err != nil {
   314  			return nil, errors.Trace(err)
   315  		}
   316  		return u, nil
   317  	}
   318  	return startFunc
   319  }
   320  
   321  func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
   322  	defer func() {
   323  		// If this is a CAAS unit, then dead errors are fairly normal ways to exit
   324  		// the uniter main loop, but the parent operator agent needs to keep running.
   325  		errorString := "<unknown>"
   326  		if err != nil {
   327  			errorString = err.Error()
   328  		}
   329  		// If something else killed the tomb, then use that error.
   330  		if errors.Is(err, tomb.ErrDying) {
   331  			select {
   332  			case <-u.catacomb.Dying():
   333  				errorString = u.catacomb.Err().Error()
   334  			default:
   335  			}
   336  		}
   337  		if errors.Is(err, ErrCAASUnitDead) {
   338  			errorString = err.Error()
   339  			err = nil
   340  		}
   341  		if u.runListener != nil {
   342  			u.runListener.UnregisterRunner(unitTag.Id())
   343  		}
   344  		if u.localRunListener != nil {
   345  			u.localRunListener.UnregisterRunner(unitTag.Id())
   346  		}
   347  		u.logger.Infof("unit %q shutting down: %s", unitTag.Id(), errorString)
   348  	}()
   349  
   350  	if err := u.init(unitTag); err != nil {
   351  		switch cause := errors.Cause(err); cause {
   352  		case resolver.ErrLoopAborted:
   353  			return u.catacomb.ErrDying()
   354  		case ErrCAASUnitDead:
   355  			// Normal exit from the loop as we don't want it restarted.
   356  			return nil
   357  		case jworker.ErrTerminateAgent:
   358  			return err
   359  		default:
   360  			return errors.Annotatef(err, "failed to initialize uniter for %q", unitTag)
   361  		}
   362  	}
   363  	u.logger.Infof("unit %q started", u.unit)
   364  
   365  	// Check we are running the correct charm version.
   366  	if u.sidecar && u.enforcedCharmModifiedVersion != -1 {
   367  		app, err := u.unit.Application()
   368  		if err != nil {
   369  			return errors.Trace(err)
   370  		}
   371  		appCharmModifiedVersion, err := app.CharmModifiedVersion()
   372  		if err != nil {
   373  			return errors.Trace(err)
   374  		}
   375  		if appCharmModifiedVersion != u.enforcedCharmModifiedVersion {
   376  			u.logger.Infof("remote charm modified version (%d) does not match agent's (%d)",
   377  				appCharmModifiedVersion, u.enforcedCharmModifiedVersion)
   378  			return u.stopUnitError()
   379  		}
   380  	}
   381  
   382  	canApplyCharmProfile, charmURL, charmModifiedVersion, err := u.charmState()
   383  	if err != nil {
   384  		return errors.Trace(err)
   385  	}
   386  
   387  	var watcher *remotestate.RemoteStateWatcher
   388  
   389  	u.logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry)
   390  	retryHookChan := make(chan struct{}, 1)
   391  	// TODO(katco): 2016-08-09: This type is deprecated: lp:1611427
   392  	retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{
   393  		Min:    u.hookRetryStrategy.MinRetryTime,
   394  		Max:    u.hookRetryStrategy.MaxRetryTime,
   395  		Jitter: u.hookRetryStrategy.JitterRetryTime,
   396  		Factor: u.hookRetryStrategy.RetryTimeFactor,
   397  		Func: func() {
   398  			// Don't try to send on the channel if it's already full
   399  			// This can happen if the timer fires off before the event is consumed
   400  			// by the resolver loop
   401  			select {
   402  			case retryHookChan <- struct{}{}:
   403  			default:
   404  			}
   405  		},
   406  		Clock: u.clock,
   407  	})
   408  	defer func() {
   409  		// Whenever we exit the uniter we want to stop a potentially
   410  		// running timer so it doesn't trigger for nothing.
   411  		retryHookTimer.Reset()
   412  	}()
   413  
   414  	restartWatcher := func() error {
   415  		if watcher != nil {
   416  			// watcher added to catacomb, will kill uniter if there's an error.
   417  			_ = worker.Stop(watcher)
   418  		}
   419  		var err error
   420  		watcher, err = remotestate.NewWatcher(
   421  			remotestate.WatcherConfig{
   422  				State:                         remotestate.NewAPIState(u.st),
   423  				LeadershipTracker:             u.leadershipTracker,
   424  				SecretsClient:                 u.secretsClient,
   425  				SecretRotateWatcherFunc:       u.secretRotateWatcherFunc,
   426  				SecretExpiryWatcherFunc:       u.secretExpiryWatcherFunc,
   427  				UnitTag:                       unitTag,
   428  				UpdateStatusChannel:           u.updateStatusAt,
   429  				CommandChannel:                u.commandChannel,
   430  				RetryHookChannel:              retryHookChan,
   431  				ContainerRunningStatusChannel: u.containerRunningStatusChannel,
   432  				ContainerRunningStatusFunc:    u.containerRunningStatusFunc,
   433  				ModelType:                     u.modelType,
   434  				Logger:                        u.logger.Child("remotestate"),
   435  				CanApplyCharmProfile:          canApplyCharmProfile,
   436  				Sidecar:                       u.sidecar,
   437  				EnforcedCharmModifiedVersion:  u.enforcedCharmModifiedVersion,
   438  				WorkloadEventChannel:          u.workloadEventChannel,
   439  				InitialWorkloadEventIDs:       u.workloadEvents.EventIDs(),
   440  				ShutdownChannel:               u.shutdownChannel,
   441  			})
   442  		if err != nil {
   443  			return errors.Trace(err)
   444  		}
   445  		if err := u.catacomb.Add(watcher); err != nil {
   446  			return errors.Trace(err)
   447  		}
   448  		return nil
   449  	}
   450  
   451  	onIdle := func() error {
   452  		opState := u.operationExecutor.State()
   453  		if opState.Kind != operation.Continue {
   454  			// We should only set idle status if we're in
   455  			// the "Continue" state, which indicates that
   456  			// there is nothing to do and we're not in an
   457  			// error state.
   458  			return nil
   459  		}
   460  		return setAgentStatus(u, status.Idle, "", nil)
   461  	}
   462  
   463  	clearResolved := func() error {
   464  		if err := u.unit.ClearResolved(); err != nil {
   465  			return errors.Trace(err)
   466  		}
   467  		watcher.ClearResolvedMode()
   468  		return nil
   469  	}
   470  
   471  	if u.modelType == model.CAAS && u.isRemoteUnit {
   472  		if u.containerRunningStatusChannel == nil {
   473  			return errors.NotValidf("ContainerRunningStatusChannel missing for CAAS remote unit")
   474  		}
   475  		if u.containerRunningStatusFunc == nil {
   476  			return errors.NotValidf("ContainerRunningStatusFunc missing for CAAS remote unit")
   477  		}
   478  	}
   479  
   480  	var rebootDetected bool
   481  	if u.modelType == model.IAAS {
   482  		if rebootDetected, err = u.rebootQuerier.Query(unitTag); err != nil {
   483  			return errors.Annotatef(err, "could not check reboot status for %q", unitTag)
   484  		}
   485  	} else if u.modelType == model.CAAS && u.sidecar {
   486  		rebootDetected = true
   487  	}
   488  	rebootResolver := reboot.NewResolver(u.logger, rebootDetected)
   489  
   490  	for {
   491  		if err = restartWatcher(); err != nil {
   492  			err = errors.Annotate(err, "(re)starting watcher")
   493  			break
   494  		}
   495  
   496  		cfg := ResolverConfig{
   497  			ModelType:           u.modelType,
   498  			ClearResolved:       clearResolved,
   499  			ReportHookError:     u.reportHookError,
   500  			ShouldRetryHooks:    u.hookRetryStrategy.ShouldRetry,
   501  			StartRetryHookTimer: retryHookTimer.Start,
   502  			StopRetryHookTimer:  retryHookTimer.Reset,
   503  			Actions: actions.NewResolver(
   504  				u.logger.Child("actions"),
   505  			),
   506  			VerifyCharmProfile: verifycharmprofile.NewResolver(
   507  				u.logger.Child("verifycharmprofile"),
   508  				u.modelType,
   509  			),
   510  			UpgradeSeries: upgradeseries.NewResolver(
   511  				u.logger.Child("upgradeseries"),
   512  			),
   513  			Reboot: rebootResolver,
   514  			Leadership: uniterleadership.NewResolver(
   515  				u.logger.Child("leadership"),
   516  			),
   517  			CreatedRelations: relation.NewCreatedRelationResolver(
   518  				u.relationStateTracker, u.logger.ChildWithLabels("relation", corelogger.CMR)),
   519  			Relations: relation.NewRelationResolver(
   520  				u.relationStateTracker, u.unit, u.logger.ChildWithLabels("relation", corelogger.CMR)),
   521  			Storage: storage.NewResolver(
   522  				u.logger.Child("storage"), u.storage, u.modelType),
   523  			Commands: runcommands.NewCommandsResolver(
   524  				u.commands, watcher.CommandCompleted,
   525  			),
   526  			Secrets: secrets.NewSecretsResolver(
   527  				u.logger.ChildWithLabels("secrets", corelogger.SECRETS),
   528  				u.secretsTracker,
   529  				watcher.RotateSecretCompleted,
   530  				watcher.ExpireRevisionCompleted,
   531  				watcher.RemoveSecretsCompleted,
   532  			),
   533  			Logger: u.logger,
   534  		}
   535  		if u.modelType == model.CAAS && u.isRemoteUnit {
   536  			cfg.OptionalResolvers = append(cfg.OptionalResolvers, container.NewRemoteContainerInitResolver())
   537  		}
   538  		if len(u.containerNames) > 0 {
   539  			cfg.OptionalResolvers = append(cfg.OptionalResolvers, container.NewWorkloadHookResolver(
   540  				u.logger.Child("workload"),
   541  				u.workloadEvents,
   542  				watcher.WorkloadEventCompleted),
   543  			)
   544  		}
   545  		uniterResolver := NewUniterResolver(cfg)
   546  
   547  		// We should not do anything until there has been a change
   548  		// to the remote state. The watcher will trigger at least
   549  		// once initially.
   550  		select {
   551  		case <-u.catacomb.Dying():
   552  			return u.catacomb.ErrDying()
   553  		case <-watcher.RemoteStateChanged():
   554  		}
   555  
   556  		localState := resolver.LocalState{
   557  			CharmURL:             charmURL,
   558  			CharmModifiedVersion: charmModifiedVersion,
   559  			UpgradeMachineStatus: model.UpgradeSeriesNotStarted,
   560  			// CAAS remote units should trigger remote update of the charm every start.
   561  			OutdatedRemoteCharm: u.isRemoteUnit,
   562  		}
   563  
   564  		for err == nil {
   565  			err = resolver.Loop(resolver.LoopConfig{
   566  				Resolver:      uniterResolver,
   567  				Watcher:       watcher,
   568  				Executor:      u.operationExecutor,
   569  				Factory:       u.operationFactory,
   570  				Abort:         u.catacomb.Dying(),
   571  				OnIdle:        onIdle,
   572  				CharmDirGuard: u.charmDirGuard,
   573  				CharmDir:      u.paths.State.CharmDir,
   574  				Logger:        u.logger.Child("resolver"),
   575  			}, &localState)
   576  
   577  			err = u.translateResolverErr(err)
   578  
   579  			switch {
   580  			case err == nil:
   581  				// Loop back around.
   582  			case errors.Is(err, resolver.ErrLoopAborted):
   583  				err = u.catacomb.ErrDying()
   584  			case errors.Is(err, operation.ErrNeedsReboot):
   585  				err = jworker.ErrRebootMachine
   586  			case errors.Is(err, operation.ErrHookFailed):
   587  				// Loop back around. The resolver can tell that it is in
   588  				// an error state by inspecting the operation state.
   589  				err = nil
   590  			case errors.Is(err, runner.ErrTerminated):
   591  				localState.HookWasShutdown = true
   592  				err = nil
   593  			case errors.Is(err, resolver.ErrUnitDead):
   594  				err = u.terminate()
   595  			case errors.Is(err, resolver.ErrRestart):
   596  				// make sure we update the two values used above in
   597  				// creating LocalState.
   598  				charmURL = localState.CharmURL
   599  				charmModifiedVersion = localState.CharmModifiedVersion
   600  				// leave err assigned, causing loop to break
   601  			case errors.Is(err, jworker.ErrTerminateAgent):
   602  				// terminate agent
   603  			default:
   604  				// We need to set conflicted from here, because error
   605  				// handling is outside of the resolver's control.
   606  				if _, is := errors.AsType[*operation.DeployConflictError](err); is {
   607  					localState.Conflicted = true
   608  					err = setAgentStatus(u, status.Error, "upgrade failed", nil)
   609  				} else {
   610  					reportAgentError(u, "resolver loop error", err)
   611  				}
   612  			}
   613  		}
   614  
   615  		if !errors.Is(err, resolver.ErrRestart) {
   616  			break
   617  		}
   618  	}
   619  	return err
   620  }
   621  
   622  func (u *Uniter) verifyCharmProfile(url string) error {
   623  	// NOTE: this is very similar code to verifyCharmProfile.NextOp,
   624  	// if you make changes here, check to see if they are needed there.
   625  	ch, err := u.st.Charm(url)
   626  	if err != nil {
   627  		return errors.Trace(err)
   628  	}
   629  	required, err := ch.LXDProfileRequired()
   630  	if err != nil {
   631  		return errors.Trace(err)
   632  	}
   633  	if !required {
   634  		// If no lxd profile is required for this charm, move on.
   635  		u.logger.Debugf("no lxd profile required for %s", url)
   636  		return nil
   637  	}
   638  	profile, err := u.unit.LXDProfileName()
   639  	if err != nil {
   640  		return errors.Trace(err)
   641  	}
   642  	if profile == "" {
   643  		if err := u.unit.SetUnitStatus(status.Waiting, "required charm profile not yet applied to machine", nil); err != nil {
   644  			return errors.Trace(err)
   645  		}
   646  		u.logger.Debugf("required lxd profile not found on machine")
   647  		return errors.NotFoundf("required charm profile on machine")
   648  	}
   649  	// double check profile revision matches charm revision.
   650  	rev, err := lxdprofile.ProfileRevision(profile)
   651  	if err != nil {
   652  		return errors.Trace(err)
   653  	}
   654  	curl, err := jujucharm.ParseURL(url)
   655  	if err != nil {
   656  		return errors.Trace(err)
   657  	}
   658  	if rev != curl.Revision {
   659  		if err := u.unit.SetUnitStatus(status.Waiting, fmt.Sprintf("required charm profile %q not yet applied to machine", profile), nil); err != nil {
   660  			return errors.Trace(err)
   661  		}
   662  		u.logger.Debugf("charm is revision %d, charm profile has revision %d", curl.Revision, rev)
   663  		return errors.NotFoundf("required charm profile, %q, on machine", profile)
   664  	}
   665  	u.logger.Debugf("required lxd profile %q FOUND on machine", profile)
   666  	if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil {
   667  		return errors.Trace(err)
   668  	}
   669  	return nil
   670  }
   671  
   672  // charmState returns data for the local state setup.
   673  // While gathering the data, look for interrupted Install or pending
   674  // charm upgrade, execute if found.
   675  func (u *Uniter) charmState() (bool, string, int, error) {
   676  	// Install is a special case, as it must run before there
   677  	// is any remote state, and before the remote state watcher
   678  	// is started.
   679  	var charmURL string
   680  	var charmModifiedVersion int
   681  
   682  	canApplyCharmProfile, err := u.unit.CanApplyLXDProfile()
   683  	if err != nil {
   684  		return canApplyCharmProfile, charmURL, charmModifiedVersion, err
   685  	}
   686  
   687  	opState := u.operationExecutor.State()
   688  	if opState.Kind == operation.Install {
   689  		u.logger.Infof("resuming charm install")
   690  		if canApplyCharmProfile {
   691  			// Note: canApplyCharmProfile will be false for a CAAS model.
   692  			// Verify the charm profile before proceeding.
   693  			if err := u.verifyCharmProfile(opState.CharmURL); err != nil {
   694  				return canApplyCharmProfile, charmURL, charmModifiedVersion, err
   695  			}
   696  		}
   697  		op, err := u.operationFactory.NewInstall(opState.CharmURL)
   698  		if err != nil {
   699  			return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err)
   700  		}
   701  		if err := u.operationExecutor.Run(op, nil); err != nil {
   702  			return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err)
   703  		}
   704  		charmURL = opState.CharmURL
   705  		return canApplyCharmProfile, charmURL, charmModifiedVersion, nil
   706  	}
   707  	// No install needed, find the curl and start.
   708  	curl, err := u.unit.CharmURL()
   709  	if err != nil {
   710  		return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err)
   711  	}
   712  	charmURL = curl
   713  	app, err := u.unit.Application()
   714  	if err != nil {
   715  		return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err)
   716  	}
   717  
   718  	// TODO (hml) 25-09-2020 - investigate
   719  	// This assumes that the uniter is not restarting after an application
   720  	// changed notification, with changes to CharmModifiedVersion, but before
   721  	// it could be acted on.
   722  	charmModifiedVersion, err = app.CharmModifiedVersion()
   723  	if err != nil {
   724  		return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err)
   725  	}
   726  
   727  	return canApplyCharmProfile, charmURL, charmModifiedVersion, nil
   728  }
   729  
   730  func (u *Uniter) terminate() error {
   731  	unitWatcher, err := u.unit.Watch()
   732  	if err != nil {
   733  		return errors.Trace(err)
   734  	}
   735  	if err := u.catacomb.Add(unitWatcher); err != nil {
   736  		return errors.Trace(err)
   737  	}
   738  	for {
   739  		select {
   740  		case <-u.catacomb.Dying():
   741  			return u.catacomb.ErrDying()
   742  		case _, ok := <-unitWatcher.Changes():
   743  			if !ok {
   744  				return errors.New("unit watcher closed")
   745  			}
   746  			if err := u.unit.Refresh(); err != nil {
   747  				return errors.Trace(err)
   748  			}
   749  			if hasSubs, err := u.unit.HasSubordinates(); err != nil {
   750  				return errors.Trace(err)
   751  			} else if hasSubs {
   752  				continue
   753  			}
   754  			// The unit is known to be Dying; so if it didn't have subordinates
   755  			// just above, it can't acquire new ones before this call.
   756  			if err := u.unit.EnsureDead(); err != nil {
   757  				return errors.Trace(err)
   758  			}
   759  			return u.stopUnitError()
   760  		}
   761  	}
   762  }
   763  
   764  // stopUnitError returns the error to use when exiting from stopping the unit.
   765  // For IAAS models, we want to terminate the agent, as each unit is run by
   766  // an individual agent for that unit.
   767  func (u *Uniter) stopUnitError() error {
   768  	u.logger.Debugf("u.modelType: %s", u.modelType)
   769  	if u.modelType == model.CAAS {
   770  		if u.sidecar {
   771  			return errors.WithType(jworker.ErrTerminateAgent, ErrCAASUnitDead)
   772  		}
   773  		return ErrCAASUnitDead
   774  	}
   775  	return jworker.ErrTerminateAgent
   776  }
   777  
   778  func (u *Uniter) init(unitTag names.UnitTag) (err error) {
   779  	switch u.modelType {
   780  	case model.IAAS, model.CAAS:
   781  		// known types, all good
   782  	default:
   783  		return errors.Errorf("unknown model type %q", u.modelType)
   784  	}
   785  
   786  	// If we started up already dead, we should not progress further.
   787  	// If we become Dead immediately after starting up, we may well
   788  	// complete any operations in progress before detecting it,
   789  	// but that race is fundamental and inescapable,
   790  	// whereas this one is not.
   791  	u.unit, err = u.st.Unit(unitTag)
   792  	if err != nil {
   793  		if errors.IsNotFound(err) {
   794  			return u.stopUnitError()
   795  		}
   796  		return errors.Trace(err)
   797  	}
   798  	if u.unit.Life() == life.Dead {
   799  		return u.stopUnitError()
   800  	}
   801  
   802  	// If initialising for the first time after deploying, update the status.
   803  	currentStatus, err := u.unit.UnitStatus()
   804  	if err != nil {
   805  		return errors.Trace(err)
   806  	}
   807  	// TODO(fwereade/wallyworld): we should have an explicit place in the model
   808  	// to tell us when we've hit this point, instead of piggybacking on top of
   809  	// status and/or status history.
   810  	// If the previous status was waiting for machine, we transition to the next step.
   811  	if currentStatus.Status == string(status.Waiting) &&
   812  		(currentStatus.Info == status.MessageWaitForMachine || currentStatus.Info == status.MessageInstallingAgent) {
   813  		if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil {
   814  			return errors.Trace(err)
   815  		}
   816  	}
   817  	if err := tools.EnsureSymlinks(u.paths.ToolsDir, u.paths.ToolsDir, jujuc.CommandNames()); err != nil {
   818  		return err
   819  	}
   820  	relStateTracker, err := relation.NewRelationStateTracker(
   821  		relation.RelationStateTrackerConfig{
   822  			State:                u.st,
   823  			Unit:                 u.unit,
   824  			Tracker:              u.leadershipTracker,
   825  			NewLeadershipContext: context.NewLeadershipContext,
   826  			CharmDir:             u.paths.State.CharmDir,
   827  			Abort:                u.catacomb.Dying(),
   828  			Logger:               u.logger.Child("relation"),
   829  		})
   830  	if err != nil {
   831  		return errors.Annotatef(err, "cannot create relation state tracker")
   832  	}
   833  	u.relationStateTracker = relStateTracker
   834  	u.commands = runcommands.NewCommands()
   835  	u.commandChannel = make(chan string)
   836  
   837  	storageAttachments, err := storage.NewAttachments(
   838  		u.st, unitTag, u.unit, u.catacomb.Dying(),
   839  	)
   840  	if err != nil {
   841  		return errors.Annotatef(err, "cannot create storage hook source")
   842  	}
   843  	u.storage = storageAttachments
   844  
   845  	secretsTracker, err := secrets.NewSecrets(
   846  		u.secretsClient, unitTag, u.unit, u.logger.ChildWithLabels("secrets", corelogger.SECRETS),
   847  	)
   848  	if err != nil {
   849  		return errors.Annotatef(err, "cannot create secrets tracker")
   850  	}
   851  	u.secretsTracker = secretsTracker
   852  
   853  	if err := charm.ClearDownloads(u.paths.State.BundlesDir); err != nil {
   854  		u.logger.Warningf(err.Error())
   855  	}
   856  	charmLogger := u.logger.Child("charm")
   857  	deployer, err := u.newDeployer(
   858  		u.paths.State.CharmDir,
   859  		u.paths.State.DeployerDir,
   860  		charm.NewBundlesDir(
   861  			u.paths.State.BundlesDir,
   862  			u.downloader,
   863  			charmLogger),
   864  		charmLogger,
   865  	)
   866  	if err != nil {
   867  		return errors.Annotatef(err, "cannot create deployer")
   868  	}
   869  	contextFactory, err := context.NewContextFactory(context.FactoryConfig{
   870  		State:                u.st,
   871  		SecretsClient:        u.secretsClient,
   872  		SecretsBackendGetter: u.secretsBackendGetter,
   873  		Unit:                 u.unit,
   874  		Resources:            u.resources,
   875  		Payloads:             u.payloads,
   876  		Tracker:              u.leadershipTracker,
   877  		GetRelationInfos:     u.relationStateTracker.GetInfo,
   878  		Paths:                u.paths,
   879  		Clock:                u.clock,
   880  		Logger:               u.logger.Child("context"),
   881  	})
   882  	if err != nil {
   883  		return err
   884  	}
   885  	var remoteExecutor runner.ExecFunc
   886  	if u.newRemoteRunnerExecutor != nil {
   887  		remoteExecutor = u.newRemoteRunnerExecutor(u.unit, u.paths)
   888  	}
   889  	runnerFactory, err := runner.NewFactory(
   890  		u.paths, contextFactory, u.newProcessRunner, remoteExecutor,
   891  	)
   892  	if err != nil {
   893  		return errors.Trace(err)
   894  	}
   895  	u.operationFactory = operation.NewFactory(operation.FactoryParams{
   896  		Deployer:       deployer,
   897  		RunnerFactory:  runnerFactory,
   898  		Callbacks:      &operationCallbacks{u},
   899  		State:          u.st,
   900  		Abort:          u.catacomb.Dying(),
   901  		MetricSpoolDir: u.paths.GetMetricsSpoolDir(),
   902  		Logger:         u.logger.Child("operation"),
   903  	})
   904  
   905  	charmURL, err := u.getApplicationCharmURL()
   906  	if err != nil {
   907  		return errors.Trace(err)
   908  	}
   909  
   910  	initialState := operation.State{
   911  		Kind:     operation.Install,
   912  		Step:     operation.Queued,
   913  		CharmURL: charmURL,
   914  	}
   915  
   916  	operationExecutor, err := u.newOperationExecutor(u.unit.Name(), operation.ExecutorConfig{
   917  		StateReadWriter: u.unit,
   918  		InitialState:    initialState,
   919  		AcquireLock:     u.acquireExecutionLock,
   920  		Logger:          u.logger.Child("operation"),
   921  	})
   922  	if err != nil {
   923  		return errors.Trace(err)
   924  	}
   925  	u.operationExecutor = operationExecutor
   926  
   927  	// Ensure we have an agent directory to to write the socket.
   928  	if err := os.MkdirAll(u.paths.State.BaseDir, 0755); err != nil {
   929  		return errors.Trace(err)
   930  	}
   931  	socket := u.paths.Runtime.LocalJujuExecSocket.Server
   932  	u.logger.Debugf("starting local juju-exec listener on %v", socket)
   933  	u.localRunListener, err = NewRunListener(socket, u.logger)
   934  	if err != nil {
   935  		return errors.Annotate(err, "creating juju run listener")
   936  	}
   937  	rlw := NewRunListenerWrapper(u.localRunListener, u.logger)
   938  	if err := u.catacomb.Add(rlw); err != nil {
   939  		return errors.Trace(err)
   940  	}
   941  
   942  	commandRunner, err := NewChannelCommandRunner(ChannelCommandRunnerConfig{
   943  		Abort:          u.catacomb.Dying(),
   944  		Commands:       u.commands,
   945  		CommandChannel: u.commandChannel,
   946  	})
   947  	if err != nil {
   948  		return errors.Annotate(err, "creating command runner")
   949  	}
   950  	u.localRunListener.RegisterRunner(u.unit.Name(), commandRunner)
   951  	if u.runListener != nil {
   952  		u.runListener.RegisterRunner(u.unit.Name(), commandRunner)
   953  	}
   954  
   955  	u.workloadEvents = container.NewWorkloadEvents()
   956  	u.workloadEventChannel = make(chan string)
   957  	if len(u.containerNames) > 0 {
   958  		poller := NewPebblePoller(u.logger, u.clock, u.containerNames, u.workloadEventChannel, u.workloadEvents, u.newPebbleClient)
   959  		if err := u.catacomb.Add(poller); err != nil {
   960  			return errors.Trace(err)
   961  		}
   962  		noticer := NewPebbleNoticer(u.logger, u.clock, u.containerNames, u.workloadEventChannel, u.workloadEvents, u.newPebbleClient)
   963  		if err := u.catacomb.Add(noticer); err != nil {
   964  			return errors.Trace(err)
   965  		}
   966  	}
   967  
   968  	return nil
   969  }
   970  
   971  func (u *Uniter) Kill() {
   972  	u.catacomb.Kill(nil)
   973  }
   974  
   975  func (u *Uniter) Wait() error {
   976  	return u.catacomb.Wait()
   977  }
   978  
   979  func (u *Uniter) getApplicationCharmURL() (string, error) {
   980  	// TODO(fwereade): pretty sure there's no reason to make 2 API calls here.
   981  	app, err := u.st.Application(u.unit.ApplicationTag())
   982  	if err != nil {
   983  		return "", err
   984  	}
   985  	charmURL, _, err := app.CharmURL()
   986  	return charmURL, err
   987  }
   988  
   989  // RunCommands executes the supplied commands in a hook context.
   990  func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) {
   991  	// TODO(axw) drop this when we move the run-listener to an independent
   992  	// worker. This exists purely for the tests.
   993  	return u.localRunListener.RunCommands(args)
   994  }
   995  
   996  // acquireExecutionLock acquires the machine-level execution lock, and
   997  // returns a func that must be called to unlock it. It's used by operation.Executor
   998  // when running operations that execute external code.
   999  func (u *Uniter) acquireExecutionLock(action, executionGroup string) (func(), error) {
  1000  	// We want to make sure we don't block forever when locking, but take the
  1001  	// Uniter's catacomb into account.
  1002  	spec := machinelock.Spec{
  1003  		Cancel:  u.catacomb.Dying(),
  1004  		Worker:  fmt.Sprintf("%s uniter", u.unit.Name()),
  1005  		Comment: action,
  1006  		Group:   executionGroup,
  1007  	}
  1008  	releaser, err := u.hookLock.Acquire(spec)
  1009  	if err != nil {
  1010  		return nil, errors.Trace(err)
  1011  	}
  1012  	return releaser, nil
  1013  }
  1014  
  1015  func (u *Uniter) reportHookError(hookInfo hook.Info) error {
  1016  	// Set the agent status to "error". We must do this here in case the
  1017  	// hook is interrupted (e.g. unit agent crashes), rather than immediately
  1018  	// after attempting a runHookOp.
  1019  	hookName := string(hookInfo.Kind)
  1020  	hookMessage := string(hookInfo.Kind)
  1021  	statusData := map[string]interface{}{}
  1022  	if hookInfo.Kind.IsRelation() {
  1023  		statusData["relation-id"] = hookInfo.RelationId
  1024  		if hookInfo.RemoteUnit != "" {
  1025  			statusData["remote-unit"] = hookInfo.RemoteUnit
  1026  		}
  1027  		relationName, err := u.relationStateTracker.Name(hookInfo.RelationId)
  1028  		if err != nil {
  1029  			hookMessage = fmt.Sprintf("%s: %v", hookInfo.Kind, err)
  1030  		} else {
  1031  			hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind)
  1032  			hookMessage = hookName
  1033  		}
  1034  	}
  1035  	if hookInfo.Kind.IsSecret() {
  1036  		statusData["secret-uri"] = hookInfo.SecretURI
  1037  		statusData["secret-label"] = hookInfo.SecretLabel
  1038  	}
  1039  	statusData["hook"] = hookName
  1040  	statusMessage := fmt.Sprintf("hook failed: %q", hookMessage)
  1041  	return setAgentStatus(u, status.Error, statusMessage, statusData)
  1042  }
  1043  
  1044  // Terminate terminates the Uniter worker, ensuring the stop hook is fired before
  1045  // exiting with ErrTerminateAgent.
  1046  func (u *Uniter) Terminate() error {
  1047  	select {
  1048  	case u.shutdownChannel <- true:
  1049  	default:
  1050  	}
  1051  	return nil
  1052  }
  1053  
  1054  // Report provides information for the engine report.
  1055  func (u *Uniter) Report() map[string]interface{} {
  1056  	result := make(map[string]interface{})
  1057  
  1058  	// We need to guard against attempting to report when setting up or dying,
  1059  	// so we don't end up panic'ing with missing information.
  1060  	if u.unit != nil {
  1061  		result["unit"] = u.unit.Name()
  1062  	}
  1063  	if u.operationExecutor != nil {
  1064  		result["local-state"] = u.operationExecutor.State().Report()
  1065  	}
  1066  	if u.relationStateTracker != nil {
  1067  		result["relations"] = u.relationStateTracker.Report()
  1068  	}
  1069  	if u.secretsTracker != nil {
  1070  		result["secrets"] = u.secretsTracker.Report()
  1071  	}
  1072  
  1073  	return result
  1074  }