github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/uniter/uniter.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "os" 9 "sync" 10 11 jujucharm "github.com/juju/charm/v12" 12 "github.com/juju/clock" 13 "github.com/juju/errors" 14 "github.com/juju/names/v5" 15 "github.com/juju/utils/v3" 16 "github.com/juju/utils/v3/exec" 17 "github.com/juju/worker/v3" 18 "github.com/juju/worker/v3/catacomb" 19 "gopkg.in/tomb.v2" 20 21 "github.com/juju/juju/agent/tools" 22 "github.com/juju/juju/api/agent/uniter" 23 "github.com/juju/juju/core/leadership" 24 "github.com/juju/juju/core/life" 25 corelogger "github.com/juju/juju/core/logger" 26 "github.com/juju/juju/core/lxdprofile" 27 "github.com/juju/juju/core/machinelock" 28 "github.com/juju/juju/core/model" 29 "github.com/juju/juju/core/status" 30 "github.com/juju/juju/core/watcher" 31 "github.com/juju/juju/rpc/params" 32 jworker "github.com/juju/juju/worker" 33 "github.com/juju/juju/worker/fortress" 34 "github.com/juju/juju/worker/uniter/actions" 35 "github.com/juju/juju/worker/uniter/charm" 36 "github.com/juju/juju/worker/uniter/container" 37 "github.com/juju/juju/worker/uniter/hook" 38 uniterleadership "github.com/juju/juju/worker/uniter/leadership" 39 "github.com/juju/juju/worker/uniter/operation" 40 "github.com/juju/juju/worker/uniter/reboot" 41 "github.com/juju/juju/worker/uniter/relation" 42 "github.com/juju/juju/worker/uniter/remotestate" 43 "github.com/juju/juju/worker/uniter/resolver" 44 "github.com/juju/juju/worker/uniter/runcommands" 45 "github.com/juju/juju/worker/uniter/runner" 46 "github.com/juju/juju/worker/uniter/runner/context" 47 "github.com/juju/juju/worker/uniter/runner/jujuc" 48 "github.com/juju/juju/worker/uniter/secrets" 49 "github.com/juju/juju/worker/uniter/storage" 50 "github.com/juju/juju/worker/uniter/upgradeseries" 51 "github.com/juju/juju/worker/uniter/verifycharmprofile" 52 ) 53 54 const ( 55 // ErrCAASUnitDead is the error returned from terminate or init 56 // if the unit is Dead. 57 ErrCAASUnitDead = errors.ConstError("unit dead") 58 ) 59 60 // A UniterExecutionObserver gets the appropriate methods called when a hook 61 // is executed and either succeeds or fails. Missing hooks don't get reported 62 // in this way. 63 type UniterExecutionObserver interface { 64 HookCompleted(hookName string) 65 HookFailed(hookName string) 66 } 67 68 // RebootQuerier is implemented by types that can deliver one-off machine 69 // reboot notifications to entities. 70 type RebootQuerier interface { 71 Query(tag names.Tag) (bool, error) 72 } 73 74 // SecretsClient provides methods used by the remote state watcher, hook context, 75 // and op callbacks. 76 type SecretsClient interface { 77 remotestate.SecretsClient 78 context.SecretsAccessor 79 } 80 81 // RemoteInitFunc is used to init remote state 82 type RemoteInitFunc func(remotestate.ContainerRunningStatus, <-chan struct{}) error 83 84 // Uniter implements the capabilities of the unit agent, for example running hooks. 85 type Uniter struct { 86 catacomb catacomb.Catacomb 87 st *uniter.State 88 secretsClient SecretsClient 89 secretsBackendGetter context.SecretsBackendGetter 90 paths Paths 91 unit *uniter.Unit 92 resources *uniter.ResourcesFacadeClient 93 payloads *uniter.PayloadFacadeClient 94 modelType model.ModelType 95 sidecar bool 96 enforcedCharmModifiedVersion int 97 storage *storage.Attachments 98 clock clock.Clock 99 100 relationStateTracker relation.RelationStateTracker 101 102 secretsTracker secrets.SecretStateTracker 103 104 // Cache the last reported status information 105 // so we don't make unnecessary api calls. 106 setStatusMutex sync.Mutex 107 lastReportedStatus status.Status 108 lastReportedMessage string 109 110 operationFactory operation.Factory 111 operationExecutor operation.Executor 112 newOperationExecutor NewOperationExecutorFunc 113 newProcessRunner runner.NewRunnerFunc 114 newDeployer charm.NewDeployerFunc 115 newRemoteRunnerExecutor NewRunnerExecutorFunc 116 translateResolverErr func(error) error 117 118 leadershipTracker leadership.TrackerWorker 119 charmDirGuard fortress.Guard 120 121 hookLock machinelock.Lock 122 123 // secretRotateWatcherFunc returns a watcher that triggers when secrets 124 // owned by this unit ot its application should be rotated. 125 secretRotateWatcherFunc remotestate.SecretTriggerWatcherFunc 126 127 // secretExpiryWatcherFunc returns a watcher that triggers when 128 // secret revisions owned by this unit or its application should be expired. 129 secretExpiryWatcherFunc remotestate.SecretTriggerWatcherFunc 130 131 Probe Probe 132 133 // TODO(axw) move the runListener and run-command code outside of the 134 // uniter, and introduce a separate worker. Each worker would feed 135 // operations to a single, synchronized runner to execute. 136 runListener *RunListener 137 localRunListener *RunListener 138 commands runcommands.Commands 139 commandChannel chan string 140 141 // The execution observer is only used in tests at this stage. Should this 142 // need to be extended, perhaps a list of observers would be needed. 143 observer UniterExecutionObserver 144 145 // updateStatusAt defines a function that will be used to generate signals for 146 // the update-status hook 147 updateStatusAt remotestate.UpdateStatusTimerFunc 148 149 // containerRunningStatusChannel, if set, is used to signal a change in the 150 // unit's status. It is passed to the remote state watcher. 151 containerRunningStatusChannel watcher.NotifyChannel 152 153 // containerRunningStatusFunc is used to determine the unit's running status. 154 containerRunningStatusFunc remotestate.ContainerRunningStatusFunc 155 156 // remoteInitFunc is used to init remote charm state. 157 remoteInitFunc RemoteInitFunc 158 159 // isRemoteUnit is true when the unit is remotely deployed. 160 isRemoteUnit bool 161 162 // containerNames will have a list of the workload containers created alongside this 163 // unit agent. 164 containerNames []string 165 166 workloadEvents container.WorkloadEvents 167 workloadEventChannel chan string 168 169 newPebbleClient NewPebbleClientFunc 170 171 // hookRetryStrategy represents configuration for hook retries 172 hookRetryStrategy params.RetryStrategy 173 174 // downloader is the downloader that should be used to get the charm 175 // archive. 176 downloader charm.Downloader 177 178 // rebootQuerier allows the uniter to detect when the machine has 179 // rebooted so we can notify the charms accordingly. 180 rebootQuerier RebootQuerier 181 logger Logger 182 183 // shutdownChannel is passed to the remote state watcher. When true is 184 // sent on the channel, it causes the uniter to start the shutdown process. 185 shutdownChannel chan bool 186 } 187 188 // UniterParams hold all the necessary parameters for a new Uniter. 189 type UniterParams struct { 190 UniterFacade *uniter.State 191 ResourcesFacade *uniter.ResourcesFacadeClient 192 PayloadFacade *uniter.PayloadFacadeClient 193 SecretsClient SecretsClient 194 SecretsBackendGetter context.SecretsBackendGetter 195 UnitTag names.UnitTag 196 ModelType model.ModelType 197 LeadershipTrackerFunc func(names.UnitTag) leadership.TrackerWorker 198 SecretRotateWatcherFunc remotestate.SecretTriggerWatcherFunc 199 SecretExpiryWatcherFunc remotestate.SecretTriggerWatcherFunc 200 DataDir string 201 Downloader charm.Downloader 202 MachineLock machinelock.Lock 203 CharmDirGuard fortress.Guard 204 UpdateStatusSignal remotestate.UpdateStatusTimerFunc 205 HookRetryStrategy params.RetryStrategy 206 NewOperationExecutor NewOperationExecutorFunc 207 NewProcessRunner runner.NewRunnerFunc 208 NewDeployer charm.NewDeployerFunc 209 NewRemoteRunnerExecutor NewRunnerExecutorFunc 210 RemoteInitFunc RemoteInitFunc 211 RunListener *RunListener 212 TranslateResolverErr func(error) error 213 Clock clock.Clock 214 ContainerRunningStatusChannel watcher.NotifyChannel 215 ContainerRunningStatusFunc remotestate.ContainerRunningStatusFunc 216 IsRemoteUnit bool 217 SocketConfig *SocketConfig 218 // TODO (mattyw, wallyworld, fwereade) Having the observer here make this approach a bit more legitimate, but it isn't. 219 // the observer is only a stop gap to be used in tests. A better approach would be to have the uniter tests start hooks 220 // that write to files, and have the tests watch the output to know that hooks have finished. 221 Observer UniterExecutionObserver 222 RebootQuerier RebootQuerier 223 Logger Logger 224 Sidecar bool 225 EnforcedCharmModifiedVersion int 226 ContainerNames []string 227 NewPebbleClient NewPebbleClientFunc 228 } 229 230 // NewOperationExecutorFunc is a func which returns an operations.Executor. 231 type NewOperationExecutorFunc func(string, operation.ExecutorConfig) (operation.Executor, error) 232 233 // ProviderIDGetter defines the API to get provider ID. 234 type ProviderIDGetter interface { 235 ProviderID() string 236 Refresh() error 237 Name() string 238 } 239 240 // NewRunnerExecutorFunc defines the type of the NewRunnerExecutor. 241 type NewRunnerExecutorFunc func(ProviderIDGetter, Paths) runner.ExecFunc 242 243 // NewUniter creates a new Uniter which will install, run, and upgrade 244 // a charm on behalf of the unit with the given unitTag, by executing 245 // hooks and operations provoked by changes in st. 246 func NewUniter(uniterParams *UniterParams) (*Uniter, error) { 247 startFunc := newUniter(uniterParams) 248 w, err := startFunc() 249 return w.(*Uniter), err 250 } 251 252 // StartUniter creates a new Uniter and starts it using the specified runner. 253 func StartUniter(runner *worker.Runner, params *UniterParams) error { 254 startFunc := newUniter(params) 255 params.Logger.Debugf("starting uniter for %q", params.UnitTag.Id()) 256 err := runner.StartWorker(params.UnitTag.Id(), startFunc) 257 return errors.Annotate(err, "error starting uniter worker") 258 } 259 260 func newUniter(uniterParams *UniterParams) func() (worker.Worker, error) { 261 translateResolverErr := uniterParams.TranslateResolverErr 262 if translateResolverErr == nil { 263 translateResolverErr = func(err error) error { return err } 264 } 265 startFunc := func() (worker.Worker, error) { 266 u := &Uniter{ 267 st: uniterParams.UniterFacade, 268 resources: uniterParams.ResourcesFacade, 269 payloads: uniterParams.PayloadFacade, 270 secretsClient: uniterParams.SecretsClient, 271 secretsBackendGetter: uniterParams.SecretsBackendGetter, 272 paths: NewPaths(uniterParams.DataDir, uniterParams.UnitTag, uniterParams.SocketConfig), 273 modelType: uniterParams.ModelType, 274 hookLock: uniterParams.MachineLock, 275 leadershipTracker: uniterParams.LeadershipTrackerFunc(uniterParams.UnitTag), 276 secretRotateWatcherFunc: uniterParams.SecretRotateWatcherFunc, 277 secretExpiryWatcherFunc: uniterParams.SecretExpiryWatcherFunc, 278 charmDirGuard: uniterParams.CharmDirGuard, 279 updateStatusAt: uniterParams.UpdateStatusSignal, 280 hookRetryStrategy: uniterParams.HookRetryStrategy, 281 newOperationExecutor: uniterParams.NewOperationExecutor, 282 newProcessRunner: uniterParams.NewProcessRunner, 283 newDeployer: uniterParams.NewDeployer, 284 newRemoteRunnerExecutor: uniterParams.NewRemoteRunnerExecutor, 285 remoteInitFunc: uniterParams.RemoteInitFunc, 286 translateResolverErr: translateResolverErr, 287 observer: uniterParams.Observer, 288 clock: uniterParams.Clock, 289 downloader: uniterParams.Downloader, 290 containerRunningStatusChannel: uniterParams.ContainerRunningStatusChannel, 291 containerRunningStatusFunc: uniterParams.ContainerRunningStatusFunc, 292 isRemoteUnit: uniterParams.IsRemoteUnit, 293 runListener: uniterParams.RunListener, 294 rebootQuerier: uniterParams.RebootQuerier, 295 logger: uniterParams.Logger, 296 sidecar: uniterParams.Sidecar, 297 enforcedCharmModifiedVersion: uniterParams.EnforcedCharmModifiedVersion, 298 containerNames: uniterParams.ContainerNames, 299 newPebbleClient: uniterParams.NewPebbleClient, 300 shutdownChannel: make(chan bool, 1), 301 } 302 plan := catacomb.Plan{ 303 Site: &u.catacomb, 304 Work: func() error { 305 return u.loop(uniterParams.UnitTag) 306 }, 307 } 308 if u.modelType == model.CAAS { 309 // For CAAS models, make sure the leadership tracker is killed when the Uniter 310 // dies. 311 plan.Init = append(plan.Init, u.leadershipTracker) 312 } 313 if err := catacomb.Invoke(plan); err != nil { 314 return nil, errors.Trace(err) 315 } 316 return u, nil 317 } 318 return startFunc 319 } 320 321 func (u *Uniter) loop(unitTag names.UnitTag) (err error) { 322 defer func() { 323 // If this is a CAAS unit, then dead errors are fairly normal ways to exit 324 // the uniter main loop, but the parent operator agent needs to keep running. 325 errorString := "<unknown>" 326 if err != nil { 327 errorString = err.Error() 328 } 329 // If something else killed the tomb, then use that error. 330 if errors.Is(err, tomb.ErrDying) { 331 select { 332 case <-u.catacomb.Dying(): 333 errorString = u.catacomb.Err().Error() 334 default: 335 } 336 } 337 if errors.Is(err, ErrCAASUnitDead) { 338 errorString = err.Error() 339 err = nil 340 } 341 if u.runListener != nil { 342 u.runListener.UnregisterRunner(unitTag.Id()) 343 } 344 if u.localRunListener != nil { 345 u.localRunListener.UnregisterRunner(unitTag.Id()) 346 } 347 u.logger.Infof("unit %q shutting down: %s", unitTag.Id(), errorString) 348 }() 349 350 if err := u.init(unitTag); err != nil { 351 switch cause := errors.Cause(err); cause { 352 case resolver.ErrLoopAborted: 353 return u.catacomb.ErrDying() 354 case ErrCAASUnitDead: 355 // Normal exit from the loop as we don't want it restarted. 356 return nil 357 case jworker.ErrTerminateAgent: 358 return err 359 default: 360 return errors.Annotatef(err, "failed to initialize uniter for %q", unitTag) 361 } 362 } 363 u.logger.Infof("unit %q started", u.unit) 364 365 // Check we are running the correct charm version. 366 if u.sidecar && u.enforcedCharmModifiedVersion != -1 { 367 app, err := u.unit.Application() 368 if err != nil { 369 return errors.Trace(err) 370 } 371 appCharmModifiedVersion, err := app.CharmModifiedVersion() 372 if err != nil { 373 return errors.Trace(err) 374 } 375 if appCharmModifiedVersion != u.enforcedCharmModifiedVersion { 376 u.logger.Infof("remote charm modified version (%d) does not match agent's (%d)", 377 appCharmModifiedVersion, u.enforcedCharmModifiedVersion) 378 return u.stopUnitError() 379 } 380 } 381 382 canApplyCharmProfile, charmURL, charmModifiedVersion, err := u.charmState() 383 if err != nil { 384 return errors.Trace(err) 385 } 386 387 var watcher *remotestate.RemoteStateWatcher 388 389 u.logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry) 390 retryHookChan := make(chan struct{}, 1) 391 // TODO(katco): 2016-08-09: This type is deprecated: lp:1611427 392 retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{ 393 Min: u.hookRetryStrategy.MinRetryTime, 394 Max: u.hookRetryStrategy.MaxRetryTime, 395 Jitter: u.hookRetryStrategy.JitterRetryTime, 396 Factor: u.hookRetryStrategy.RetryTimeFactor, 397 Func: func() { 398 // Don't try to send on the channel if it's already full 399 // This can happen if the timer fires off before the event is consumed 400 // by the resolver loop 401 select { 402 case retryHookChan <- struct{}{}: 403 default: 404 } 405 }, 406 Clock: u.clock, 407 }) 408 defer func() { 409 // Whenever we exit the uniter we want to stop a potentially 410 // running timer so it doesn't trigger for nothing. 411 retryHookTimer.Reset() 412 }() 413 414 restartWatcher := func() error { 415 if watcher != nil { 416 // watcher added to catacomb, will kill uniter if there's an error. 417 _ = worker.Stop(watcher) 418 } 419 var err error 420 watcher, err = remotestate.NewWatcher( 421 remotestate.WatcherConfig{ 422 State: remotestate.NewAPIState(u.st), 423 LeadershipTracker: u.leadershipTracker, 424 SecretsClient: u.secretsClient, 425 SecretRotateWatcherFunc: u.secretRotateWatcherFunc, 426 SecretExpiryWatcherFunc: u.secretExpiryWatcherFunc, 427 UnitTag: unitTag, 428 UpdateStatusChannel: u.updateStatusAt, 429 CommandChannel: u.commandChannel, 430 RetryHookChannel: retryHookChan, 431 ContainerRunningStatusChannel: u.containerRunningStatusChannel, 432 ContainerRunningStatusFunc: u.containerRunningStatusFunc, 433 ModelType: u.modelType, 434 Logger: u.logger.Child("remotestate"), 435 CanApplyCharmProfile: canApplyCharmProfile, 436 Sidecar: u.sidecar, 437 EnforcedCharmModifiedVersion: u.enforcedCharmModifiedVersion, 438 WorkloadEventChannel: u.workloadEventChannel, 439 InitialWorkloadEventIDs: u.workloadEvents.EventIDs(), 440 ShutdownChannel: u.shutdownChannel, 441 }) 442 if err != nil { 443 return errors.Trace(err) 444 } 445 if err := u.catacomb.Add(watcher); err != nil { 446 return errors.Trace(err) 447 } 448 return nil 449 } 450 451 onIdle := func() error { 452 opState := u.operationExecutor.State() 453 if opState.Kind != operation.Continue { 454 // We should only set idle status if we're in 455 // the "Continue" state, which indicates that 456 // there is nothing to do and we're not in an 457 // error state. 458 return nil 459 } 460 return setAgentStatus(u, status.Idle, "", nil) 461 } 462 463 clearResolved := func() error { 464 if err := u.unit.ClearResolved(); err != nil { 465 return errors.Trace(err) 466 } 467 watcher.ClearResolvedMode() 468 return nil 469 } 470 471 if u.modelType == model.CAAS && u.isRemoteUnit { 472 if u.containerRunningStatusChannel == nil { 473 return errors.NotValidf("ContainerRunningStatusChannel missing for CAAS remote unit") 474 } 475 if u.containerRunningStatusFunc == nil { 476 return errors.NotValidf("ContainerRunningStatusFunc missing for CAAS remote unit") 477 } 478 } 479 480 var rebootDetected bool 481 if u.modelType == model.IAAS { 482 if rebootDetected, err = u.rebootQuerier.Query(unitTag); err != nil { 483 return errors.Annotatef(err, "could not check reboot status for %q", unitTag) 484 } 485 } else if u.modelType == model.CAAS && u.sidecar { 486 rebootDetected = true 487 } 488 rebootResolver := reboot.NewResolver(u.logger, rebootDetected) 489 490 for { 491 if err = restartWatcher(); err != nil { 492 err = errors.Annotate(err, "(re)starting watcher") 493 break 494 } 495 496 cfg := ResolverConfig{ 497 ModelType: u.modelType, 498 ClearResolved: clearResolved, 499 ReportHookError: u.reportHookError, 500 ShouldRetryHooks: u.hookRetryStrategy.ShouldRetry, 501 StartRetryHookTimer: retryHookTimer.Start, 502 StopRetryHookTimer: retryHookTimer.Reset, 503 Actions: actions.NewResolver( 504 u.logger.Child("actions"), 505 ), 506 VerifyCharmProfile: verifycharmprofile.NewResolver( 507 u.logger.Child("verifycharmprofile"), 508 u.modelType, 509 ), 510 UpgradeSeries: upgradeseries.NewResolver( 511 u.logger.Child("upgradeseries"), 512 ), 513 Reboot: rebootResolver, 514 Leadership: uniterleadership.NewResolver( 515 u.logger.Child("leadership"), 516 ), 517 CreatedRelations: relation.NewCreatedRelationResolver( 518 u.relationStateTracker, u.logger.ChildWithLabels("relation", corelogger.CMR)), 519 Relations: relation.NewRelationResolver( 520 u.relationStateTracker, u.unit, u.logger.ChildWithLabels("relation", corelogger.CMR)), 521 Storage: storage.NewResolver( 522 u.logger.Child("storage"), u.storage, u.modelType), 523 Commands: runcommands.NewCommandsResolver( 524 u.commands, watcher.CommandCompleted, 525 ), 526 Secrets: secrets.NewSecretsResolver( 527 u.logger.ChildWithLabels("secrets", corelogger.SECRETS), 528 u.secretsTracker, 529 watcher.RotateSecretCompleted, 530 watcher.ExpireRevisionCompleted, 531 watcher.RemoveSecretsCompleted, 532 ), 533 Logger: u.logger, 534 } 535 if u.modelType == model.CAAS && u.isRemoteUnit { 536 cfg.OptionalResolvers = append(cfg.OptionalResolvers, container.NewRemoteContainerInitResolver()) 537 } 538 if len(u.containerNames) > 0 { 539 cfg.OptionalResolvers = append(cfg.OptionalResolvers, container.NewWorkloadHookResolver( 540 u.logger.Child("workload"), 541 u.workloadEvents, 542 watcher.WorkloadEventCompleted), 543 ) 544 } 545 uniterResolver := NewUniterResolver(cfg) 546 547 // We should not do anything until there has been a change 548 // to the remote state. The watcher will trigger at least 549 // once initially. 550 select { 551 case <-u.catacomb.Dying(): 552 return u.catacomb.ErrDying() 553 case <-watcher.RemoteStateChanged(): 554 } 555 556 localState := resolver.LocalState{ 557 CharmURL: charmURL, 558 CharmModifiedVersion: charmModifiedVersion, 559 UpgradeMachineStatus: model.UpgradeSeriesNotStarted, 560 // CAAS remote units should trigger remote update of the charm every start. 561 OutdatedRemoteCharm: u.isRemoteUnit, 562 } 563 564 for err == nil { 565 err = resolver.Loop(resolver.LoopConfig{ 566 Resolver: uniterResolver, 567 Watcher: watcher, 568 Executor: u.operationExecutor, 569 Factory: u.operationFactory, 570 Abort: u.catacomb.Dying(), 571 OnIdle: onIdle, 572 CharmDirGuard: u.charmDirGuard, 573 CharmDir: u.paths.State.CharmDir, 574 Logger: u.logger.Child("resolver"), 575 }, &localState) 576 577 err = u.translateResolverErr(err) 578 579 switch { 580 case err == nil: 581 // Loop back around. 582 case errors.Is(err, resolver.ErrLoopAborted): 583 err = u.catacomb.ErrDying() 584 case errors.Is(err, operation.ErrNeedsReboot): 585 err = jworker.ErrRebootMachine 586 case errors.Is(err, operation.ErrHookFailed): 587 // Loop back around. The resolver can tell that it is in 588 // an error state by inspecting the operation state. 589 err = nil 590 case errors.Is(err, runner.ErrTerminated): 591 localState.HookWasShutdown = true 592 err = nil 593 case errors.Is(err, resolver.ErrUnitDead): 594 err = u.terminate() 595 case errors.Is(err, resolver.ErrRestart): 596 // make sure we update the two values used above in 597 // creating LocalState. 598 charmURL = localState.CharmURL 599 charmModifiedVersion = localState.CharmModifiedVersion 600 // leave err assigned, causing loop to break 601 case errors.Is(err, jworker.ErrTerminateAgent): 602 // terminate agent 603 default: 604 // We need to set conflicted from here, because error 605 // handling is outside of the resolver's control. 606 if _, is := errors.AsType[*operation.DeployConflictError](err); is { 607 localState.Conflicted = true 608 err = setAgentStatus(u, status.Error, "upgrade failed", nil) 609 } else { 610 reportAgentError(u, "resolver loop error", err) 611 } 612 } 613 } 614 615 if !errors.Is(err, resolver.ErrRestart) { 616 break 617 } 618 } 619 return err 620 } 621 622 func (u *Uniter) verifyCharmProfile(url string) error { 623 // NOTE: this is very similar code to verifyCharmProfile.NextOp, 624 // if you make changes here, check to see if they are needed there. 625 ch, err := u.st.Charm(url) 626 if err != nil { 627 return errors.Trace(err) 628 } 629 required, err := ch.LXDProfileRequired() 630 if err != nil { 631 return errors.Trace(err) 632 } 633 if !required { 634 // If no lxd profile is required for this charm, move on. 635 u.logger.Debugf("no lxd profile required for %s", url) 636 return nil 637 } 638 profile, err := u.unit.LXDProfileName() 639 if err != nil { 640 return errors.Trace(err) 641 } 642 if profile == "" { 643 if err := u.unit.SetUnitStatus(status.Waiting, "required charm profile not yet applied to machine", nil); err != nil { 644 return errors.Trace(err) 645 } 646 u.logger.Debugf("required lxd profile not found on machine") 647 return errors.NotFoundf("required charm profile on machine") 648 } 649 // double check profile revision matches charm revision. 650 rev, err := lxdprofile.ProfileRevision(profile) 651 if err != nil { 652 return errors.Trace(err) 653 } 654 curl, err := jujucharm.ParseURL(url) 655 if err != nil { 656 return errors.Trace(err) 657 } 658 if rev != curl.Revision { 659 if err := u.unit.SetUnitStatus(status.Waiting, fmt.Sprintf("required charm profile %q not yet applied to machine", profile), nil); err != nil { 660 return errors.Trace(err) 661 } 662 u.logger.Debugf("charm is revision %d, charm profile has revision %d", curl.Revision, rev) 663 return errors.NotFoundf("required charm profile, %q, on machine", profile) 664 } 665 u.logger.Debugf("required lxd profile %q FOUND on machine", profile) 666 if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil { 667 return errors.Trace(err) 668 } 669 return nil 670 } 671 672 // charmState returns data for the local state setup. 673 // While gathering the data, look for interrupted Install or pending 674 // charm upgrade, execute if found. 675 func (u *Uniter) charmState() (bool, string, int, error) { 676 // Install is a special case, as it must run before there 677 // is any remote state, and before the remote state watcher 678 // is started. 679 var charmURL string 680 var charmModifiedVersion int 681 682 canApplyCharmProfile, err := u.unit.CanApplyLXDProfile() 683 if err != nil { 684 return canApplyCharmProfile, charmURL, charmModifiedVersion, err 685 } 686 687 opState := u.operationExecutor.State() 688 if opState.Kind == operation.Install { 689 u.logger.Infof("resuming charm install") 690 if canApplyCharmProfile { 691 // Note: canApplyCharmProfile will be false for a CAAS model. 692 // Verify the charm profile before proceeding. 693 if err := u.verifyCharmProfile(opState.CharmURL); err != nil { 694 return canApplyCharmProfile, charmURL, charmModifiedVersion, err 695 } 696 } 697 op, err := u.operationFactory.NewInstall(opState.CharmURL) 698 if err != nil { 699 return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err) 700 } 701 if err := u.operationExecutor.Run(op, nil); err != nil { 702 return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err) 703 } 704 charmURL = opState.CharmURL 705 return canApplyCharmProfile, charmURL, charmModifiedVersion, nil 706 } 707 // No install needed, find the curl and start. 708 curl, err := u.unit.CharmURL() 709 if err != nil { 710 return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err) 711 } 712 charmURL = curl 713 app, err := u.unit.Application() 714 if err != nil { 715 return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err) 716 } 717 718 // TODO (hml) 25-09-2020 - investigate 719 // This assumes that the uniter is not restarting after an application 720 // changed notification, with changes to CharmModifiedVersion, but before 721 // it could be acted on. 722 charmModifiedVersion, err = app.CharmModifiedVersion() 723 if err != nil { 724 return canApplyCharmProfile, charmURL, charmModifiedVersion, errors.Trace(err) 725 } 726 727 return canApplyCharmProfile, charmURL, charmModifiedVersion, nil 728 } 729 730 func (u *Uniter) terminate() error { 731 unitWatcher, err := u.unit.Watch() 732 if err != nil { 733 return errors.Trace(err) 734 } 735 if err := u.catacomb.Add(unitWatcher); err != nil { 736 return errors.Trace(err) 737 } 738 for { 739 select { 740 case <-u.catacomb.Dying(): 741 return u.catacomb.ErrDying() 742 case _, ok := <-unitWatcher.Changes(): 743 if !ok { 744 return errors.New("unit watcher closed") 745 } 746 if err := u.unit.Refresh(); err != nil { 747 return errors.Trace(err) 748 } 749 if hasSubs, err := u.unit.HasSubordinates(); err != nil { 750 return errors.Trace(err) 751 } else if hasSubs { 752 continue 753 } 754 // The unit is known to be Dying; so if it didn't have subordinates 755 // just above, it can't acquire new ones before this call. 756 if err := u.unit.EnsureDead(); err != nil { 757 return errors.Trace(err) 758 } 759 return u.stopUnitError() 760 } 761 } 762 } 763 764 // stopUnitError returns the error to use when exiting from stopping the unit. 765 // For IAAS models, we want to terminate the agent, as each unit is run by 766 // an individual agent for that unit. 767 func (u *Uniter) stopUnitError() error { 768 u.logger.Debugf("u.modelType: %s", u.modelType) 769 if u.modelType == model.CAAS { 770 if u.sidecar { 771 return errors.WithType(jworker.ErrTerminateAgent, ErrCAASUnitDead) 772 } 773 return ErrCAASUnitDead 774 } 775 return jworker.ErrTerminateAgent 776 } 777 778 func (u *Uniter) init(unitTag names.UnitTag) (err error) { 779 switch u.modelType { 780 case model.IAAS, model.CAAS: 781 // known types, all good 782 default: 783 return errors.Errorf("unknown model type %q", u.modelType) 784 } 785 786 // If we started up already dead, we should not progress further. 787 // If we become Dead immediately after starting up, we may well 788 // complete any operations in progress before detecting it, 789 // but that race is fundamental and inescapable, 790 // whereas this one is not. 791 u.unit, err = u.st.Unit(unitTag) 792 if err != nil { 793 if errors.IsNotFound(err) { 794 return u.stopUnitError() 795 } 796 return errors.Trace(err) 797 } 798 if u.unit.Life() == life.Dead { 799 return u.stopUnitError() 800 } 801 802 // If initialising for the first time after deploying, update the status. 803 currentStatus, err := u.unit.UnitStatus() 804 if err != nil { 805 return errors.Trace(err) 806 } 807 // TODO(fwereade/wallyworld): we should have an explicit place in the model 808 // to tell us when we've hit this point, instead of piggybacking on top of 809 // status and/or status history. 810 // If the previous status was waiting for machine, we transition to the next step. 811 if currentStatus.Status == string(status.Waiting) && 812 (currentStatus.Info == status.MessageWaitForMachine || currentStatus.Info == status.MessageInstallingAgent) { 813 if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil { 814 return errors.Trace(err) 815 } 816 } 817 if err := tools.EnsureSymlinks(u.paths.ToolsDir, u.paths.ToolsDir, jujuc.CommandNames()); err != nil { 818 return err 819 } 820 relStateTracker, err := relation.NewRelationStateTracker( 821 relation.RelationStateTrackerConfig{ 822 State: u.st, 823 Unit: u.unit, 824 Tracker: u.leadershipTracker, 825 NewLeadershipContext: context.NewLeadershipContext, 826 CharmDir: u.paths.State.CharmDir, 827 Abort: u.catacomb.Dying(), 828 Logger: u.logger.Child("relation"), 829 }) 830 if err != nil { 831 return errors.Annotatef(err, "cannot create relation state tracker") 832 } 833 u.relationStateTracker = relStateTracker 834 u.commands = runcommands.NewCommands() 835 u.commandChannel = make(chan string) 836 837 storageAttachments, err := storage.NewAttachments( 838 u.st, unitTag, u.unit, u.catacomb.Dying(), 839 ) 840 if err != nil { 841 return errors.Annotatef(err, "cannot create storage hook source") 842 } 843 u.storage = storageAttachments 844 845 secretsTracker, err := secrets.NewSecrets( 846 u.secretsClient, unitTag, u.unit, u.logger.ChildWithLabels("secrets", corelogger.SECRETS), 847 ) 848 if err != nil { 849 return errors.Annotatef(err, "cannot create secrets tracker") 850 } 851 u.secretsTracker = secretsTracker 852 853 if err := charm.ClearDownloads(u.paths.State.BundlesDir); err != nil { 854 u.logger.Warningf(err.Error()) 855 } 856 charmLogger := u.logger.Child("charm") 857 deployer, err := u.newDeployer( 858 u.paths.State.CharmDir, 859 u.paths.State.DeployerDir, 860 charm.NewBundlesDir( 861 u.paths.State.BundlesDir, 862 u.downloader, 863 charmLogger), 864 charmLogger, 865 ) 866 if err != nil { 867 return errors.Annotatef(err, "cannot create deployer") 868 } 869 contextFactory, err := context.NewContextFactory(context.FactoryConfig{ 870 State: u.st, 871 SecretsClient: u.secretsClient, 872 SecretsBackendGetter: u.secretsBackendGetter, 873 Unit: u.unit, 874 Resources: u.resources, 875 Payloads: u.payloads, 876 Tracker: u.leadershipTracker, 877 GetRelationInfos: u.relationStateTracker.GetInfo, 878 Paths: u.paths, 879 Clock: u.clock, 880 Logger: u.logger.Child("context"), 881 }) 882 if err != nil { 883 return err 884 } 885 var remoteExecutor runner.ExecFunc 886 if u.newRemoteRunnerExecutor != nil { 887 remoteExecutor = u.newRemoteRunnerExecutor(u.unit, u.paths) 888 } 889 runnerFactory, err := runner.NewFactory( 890 u.paths, contextFactory, u.newProcessRunner, remoteExecutor, 891 ) 892 if err != nil { 893 return errors.Trace(err) 894 } 895 u.operationFactory = operation.NewFactory(operation.FactoryParams{ 896 Deployer: deployer, 897 RunnerFactory: runnerFactory, 898 Callbacks: &operationCallbacks{u}, 899 State: u.st, 900 Abort: u.catacomb.Dying(), 901 MetricSpoolDir: u.paths.GetMetricsSpoolDir(), 902 Logger: u.logger.Child("operation"), 903 }) 904 905 charmURL, err := u.getApplicationCharmURL() 906 if err != nil { 907 return errors.Trace(err) 908 } 909 910 initialState := operation.State{ 911 Kind: operation.Install, 912 Step: operation.Queued, 913 CharmURL: charmURL, 914 } 915 916 operationExecutor, err := u.newOperationExecutor(u.unit.Name(), operation.ExecutorConfig{ 917 StateReadWriter: u.unit, 918 InitialState: initialState, 919 AcquireLock: u.acquireExecutionLock, 920 Logger: u.logger.Child("operation"), 921 }) 922 if err != nil { 923 return errors.Trace(err) 924 } 925 u.operationExecutor = operationExecutor 926 927 // Ensure we have an agent directory to to write the socket. 928 if err := os.MkdirAll(u.paths.State.BaseDir, 0755); err != nil { 929 return errors.Trace(err) 930 } 931 socket := u.paths.Runtime.LocalJujuExecSocket.Server 932 u.logger.Debugf("starting local juju-exec listener on %v", socket) 933 u.localRunListener, err = NewRunListener(socket, u.logger) 934 if err != nil { 935 return errors.Annotate(err, "creating juju run listener") 936 } 937 rlw := NewRunListenerWrapper(u.localRunListener, u.logger) 938 if err := u.catacomb.Add(rlw); err != nil { 939 return errors.Trace(err) 940 } 941 942 commandRunner, err := NewChannelCommandRunner(ChannelCommandRunnerConfig{ 943 Abort: u.catacomb.Dying(), 944 Commands: u.commands, 945 CommandChannel: u.commandChannel, 946 }) 947 if err != nil { 948 return errors.Annotate(err, "creating command runner") 949 } 950 u.localRunListener.RegisterRunner(u.unit.Name(), commandRunner) 951 if u.runListener != nil { 952 u.runListener.RegisterRunner(u.unit.Name(), commandRunner) 953 } 954 955 u.workloadEvents = container.NewWorkloadEvents() 956 u.workloadEventChannel = make(chan string) 957 if len(u.containerNames) > 0 { 958 poller := NewPebblePoller(u.logger, u.clock, u.containerNames, u.workloadEventChannel, u.workloadEvents, u.newPebbleClient) 959 if err := u.catacomb.Add(poller); err != nil { 960 return errors.Trace(err) 961 } 962 noticer := NewPebbleNoticer(u.logger, u.clock, u.containerNames, u.workloadEventChannel, u.workloadEvents, u.newPebbleClient) 963 if err := u.catacomb.Add(noticer); err != nil { 964 return errors.Trace(err) 965 } 966 } 967 968 return nil 969 } 970 971 func (u *Uniter) Kill() { 972 u.catacomb.Kill(nil) 973 } 974 975 func (u *Uniter) Wait() error { 976 return u.catacomb.Wait() 977 } 978 979 func (u *Uniter) getApplicationCharmURL() (string, error) { 980 // TODO(fwereade): pretty sure there's no reason to make 2 API calls here. 981 app, err := u.st.Application(u.unit.ApplicationTag()) 982 if err != nil { 983 return "", err 984 } 985 charmURL, _, err := app.CharmURL() 986 return charmURL, err 987 } 988 989 // RunCommands executes the supplied commands in a hook context. 990 func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) { 991 // TODO(axw) drop this when we move the run-listener to an independent 992 // worker. This exists purely for the tests. 993 return u.localRunListener.RunCommands(args) 994 } 995 996 // acquireExecutionLock acquires the machine-level execution lock, and 997 // returns a func that must be called to unlock it. It's used by operation.Executor 998 // when running operations that execute external code. 999 func (u *Uniter) acquireExecutionLock(action, executionGroup string) (func(), error) { 1000 // We want to make sure we don't block forever when locking, but take the 1001 // Uniter's catacomb into account. 1002 spec := machinelock.Spec{ 1003 Cancel: u.catacomb.Dying(), 1004 Worker: fmt.Sprintf("%s uniter", u.unit.Name()), 1005 Comment: action, 1006 Group: executionGroup, 1007 } 1008 releaser, err := u.hookLock.Acquire(spec) 1009 if err != nil { 1010 return nil, errors.Trace(err) 1011 } 1012 return releaser, nil 1013 } 1014 1015 func (u *Uniter) reportHookError(hookInfo hook.Info) error { 1016 // Set the agent status to "error". We must do this here in case the 1017 // hook is interrupted (e.g. unit agent crashes), rather than immediately 1018 // after attempting a runHookOp. 1019 hookName := string(hookInfo.Kind) 1020 hookMessage := string(hookInfo.Kind) 1021 statusData := map[string]interface{}{} 1022 if hookInfo.Kind.IsRelation() { 1023 statusData["relation-id"] = hookInfo.RelationId 1024 if hookInfo.RemoteUnit != "" { 1025 statusData["remote-unit"] = hookInfo.RemoteUnit 1026 } 1027 relationName, err := u.relationStateTracker.Name(hookInfo.RelationId) 1028 if err != nil { 1029 hookMessage = fmt.Sprintf("%s: %v", hookInfo.Kind, err) 1030 } else { 1031 hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind) 1032 hookMessage = hookName 1033 } 1034 } 1035 if hookInfo.Kind.IsSecret() { 1036 statusData["secret-uri"] = hookInfo.SecretURI 1037 statusData["secret-label"] = hookInfo.SecretLabel 1038 } 1039 statusData["hook"] = hookName 1040 statusMessage := fmt.Sprintf("hook failed: %q", hookMessage) 1041 return setAgentStatus(u, status.Error, statusMessage, statusData) 1042 } 1043 1044 // Terminate terminates the Uniter worker, ensuring the stop hook is fired before 1045 // exiting with ErrTerminateAgent. 1046 func (u *Uniter) Terminate() error { 1047 select { 1048 case u.shutdownChannel <- true: 1049 default: 1050 } 1051 return nil 1052 } 1053 1054 // Report provides information for the engine report. 1055 func (u *Uniter) Report() map[string]interface{} { 1056 result := make(map[string]interface{}) 1057 1058 // We need to guard against attempting to report when setting up or dying, 1059 // so we don't end up panic'ing with missing information. 1060 if u.unit != nil { 1061 result["unit"] = u.unit.Name() 1062 } 1063 if u.operationExecutor != nil { 1064 result["local-state"] = u.operationExecutor.State().Report() 1065 } 1066 if u.relationStateTracker != nil { 1067 result["relations"] = u.relationStateTracker.Report() 1068 } 1069 if u.secretsTracker != nil { 1070 result["secrets"] = u.secretsTracker.Report() 1071 } 1072 1073 return result 1074 }