github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/uniter/resolver.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 9 jujucharm "github.com/juju/charm/v12" 10 "github.com/juju/charm/v12/hooks" 11 "github.com/juju/errors" 12 13 "github.com/juju/juju/core/life" 14 "github.com/juju/juju/core/model" 15 "github.com/juju/juju/rpc/params" 16 "github.com/juju/juju/worker" 17 "github.com/juju/juju/worker/uniter/hook" 18 "github.com/juju/juju/worker/uniter/operation" 19 "github.com/juju/juju/worker/uniter/remotestate" 20 "github.com/juju/juju/worker/uniter/resolver" 21 "github.com/juju/juju/wrench" 22 ) 23 24 // ResolverConfig defines configuration for the uniter resolver. 25 type ResolverConfig struct { 26 ModelType model.ModelType 27 ClearResolved func() error 28 ReportHookError func(hook.Info) error 29 ShouldRetryHooks bool 30 StartRetryHookTimer func() 31 StopRetryHookTimer func() 32 VerifyCharmProfile resolver.Resolver 33 UpgradeSeries resolver.Resolver 34 Reboot resolver.Resolver 35 Leadership resolver.Resolver 36 Actions resolver.Resolver 37 CreatedRelations resolver.Resolver 38 Relations resolver.Resolver 39 Storage resolver.Resolver 40 Commands resolver.Resolver 41 Secrets resolver.Resolver 42 OptionalResolvers []resolver.Resolver 43 Logger Logger 44 } 45 46 type uniterResolver struct { 47 config ResolverConfig 48 retryHookTimerStarted bool 49 } 50 51 // NewUniterResolver returns a new resolver.Resolver for the uniter. 52 func NewUniterResolver(cfg ResolverConfig) resolver.Resolver { 53 return &uniterResolver{ 54 config: cfg, 55 retryHookTimerStarted: false, 56 } 57 } 58 59 func (s *uniterResolver) NextOp( 60 localState resolver.LocalState, 61 remoteState remotestate.Snapshot, 62 opFactory operation.Factory, 63 ) (_ operation.Operation, err error) { 64 badge := "<unspecified>" 65 defer func() { 66 if err != nil && errors.Cause(err) != resolver.ErrNoOperation && err != resolver.ErrRestart { 67 s.config.Logger.Debugf("next %q operation could not be resolved: %v", badge, err) 68 } 69 }() 70 71 if remoteState.Life == life.Dead || localState.Removed { 72 return nil, resolver.ErrUnitDead 73 } 74 logger := s.config.Logger 75 76 // Operations for series-upgrade need to be resolved early, 77 // in particular because no other operations should be run when the unit 78 // has completed preparation and is waiting for upgrade completion. 79 badge = "upgrade series" 80 op, err := s.config.UpgradeSeries.NextOp(localState, remoteState, opFactory) 81 if errors.Cause(err) != resolver.ErrNoOperation { 82 if errors.Cause(err) == resolver.ErrDoNotProceed { 83 return nil, resolver.ErrNoOperation 84 } 85 return op, err 86 } 87 88 // Check if we need to notify the charms because a reboot was detected. 89 badge = "reboot" 90 op, err = s.config.Reboot.NextOp(localState, remoteState, opFactory) 91 if errors.Cause(err) != resolver.ErrNoOperation { 92 return op, err 93 } 94 95 if localState.Kind == operation.Upgrade { 96 badge = "upgrade" 97 if localState.Conflicted { 98 return s.nextOpConflicted(localState, remoteState, opFactory) 99 } 100 // continue upgrading the charm 101 logger.Infof("resuming charm upgrade") 102 return s.newUpgradeOperation(localState, remoteState, opFactory) 103 } 104 105 if localState.Restart { 106 // We've just run the upgrade op, which will change the 107 // unit's charm URL. We need to restart the resolver 108 // loop so that we start watching the correct events. 109 return nil, resolver.ErrRestart 110 } 111 112 if s.retryHookTimerStarted && (localState.Kind != operation.RunHook || localState.Step != operation.Pending) { 113 // The hook-retry timer is running, but there is no pending 114 // hook operation. We're not in an error state, so stop the 115 // timer now to reset the backoff state. 116 s.config.StopRetryHookTimer() 117 s.retryHookTimerStarted = false 118 } 119 120 badge = "relations" 121 op, err = s.config.CreatedRelations.NextOp(localState, remoteState, opFactory) 122 if errors.Cause(err) != resolver.ErrNoOperation { 123 return op, err 124 } 125 126 badge = "leadership" 127 op, err = s.config.Leadership.NextOp(localState, remoteState, opFactory) 128 if errors.Cause(err) != resolver.ErrNoOperation { 129 return op, err 130 } 131 132 badge = "optional" 133 for _, r := range s.config.OptionalResolvers { 134 op, err = r.NextOp(localState, remoteState, opFactory) 135 if errors.Cause(err) != resolver.ErrNoOperation { 136 return op, err 137 } 138 } 139 140 badge = "secrets" 141 op, err = s.config.Secrets.NextOp(localState, remoteState, opFactory) 142 if errors.Cause(err) != resolver.ErrNoOperation { 143 return op, err 144 } 145 146 badge = "actions" 147 op, err = s.config.Actions.NextOp(localState, remoteState, opFactory) 148 if errors.Cause(err) != resolver.ErrNoOperation { 149 return op, err 150 } 151 152 badge = "commands" 153 op, err = s.config.Commands.NextOp(localState, remoteState, opFactory) 154 if errors.Cause(err) != resolver.ErrNoOperation { 155 return op, err 156 } 157 158 badge = "storage" 159 op, err = s.config.Storage.NextOp(localState, remoteState, opFactory) 160 if errors.Cause(err) != resolver.ErrNoOperation { 161 return op, err 162 } 163 164 // If we are to shut down, we don't want to start running any more queued/pending hooks. 165 if remoteState.Shutdown { 166 badge = "shutdown" 167 logger.Debugf("unit agent is shutting down, will not run pending/queued hooks") 168 return s.nextOp(localState, remoteState, opFactory) 169 } 170 171 switch localState.Kind { 172 case operation.RunHook: 173 step := localState.Step 174 if localState.HookStep != nil { 175 step = *localState.HookStep 176 } 177 switch step { 178 case operation.Pending: 179 badge = "resolve hook" 180 logger.Infof("awaiting error resolution for %q hook", localState.Hook.Kind) 181 return s.nextOpHookError(localState, remoteState, opFactory) 182 183 case operation.Queued: 184 badge = "queued hook" 185 logger.Infof("found queued %q hook", localState.Hook.Kind) 186 if localState.Hook.Kind == hooks.Install { 187 // Special case: handle install in nextOp, 188 // so we do nothing when the unit is dying. 189 return s.nextOp(localState, remoteState, opFactory) 190 } 191 return opFactory.NewRunHook(*localState.Hook) 192 193 case operation.Done: 194 // Only check for the wrench if trace logging is enabled. Otherwise, 195 // we'd have to parse the charm url every time just to check to see 196 // if a wrench existed. 197 badge = "commit hook" 198 if localState.CharmURL != "" && logger.IsTraceEnabled() { 199 // If it's set, the charm url will parse. 200 curl := jujucharm.MustParseURL(localState.CharmURL) 201 if curl != nil && wrench.IsActive("hooks", fmt.Sprintf("%s-%s-error", curl.Name, localState.Hook.Kind)) { 202 s.config.Logger.Errorf("commit hook %q failed due to a wrench in the works", localState.Hook.Kind) 203 return nil, errors.Errorf("commit hook %q failed due to a wrench in the works", localState.Hook.Kind) 204 } 205 } 206 207 logger.Infof("committing %q hook", localState.Hook.Kind) 208 return opFactory.NewSkipHook(*localState.Hook) 209 210 default: 211 return nil, errors.Errorf("unknown hook operation step %v", step) 212 } 213 214 case operation.Continue: 215 badge = "idle" 216 logger.Debugf("no operations in progress; waiting for changes") 217 return s.nextOp(localState, remoteState, opFactory) 218 219 default: 220 return nil, errors.Errorf("unknown operation kind %v", localState.Kind) 221 } 222 } 223 224 // nextOpConflicted is called after an upgrade operation has failed, and hasn't 225 // yet been resolved or reverted. When in this mode, the resolver will only 226 // consider those two possibilities for progressing. 227 func (s *uniterResolver) nextOpConflicted( 228 localState resolver.LocalState, 229 remoteState remotestate.Snapshot, 230 opFactory operation.Factory, 231 ) (operation.Operation, error) { 232 // Only IAAS models deal with conflicted upgrades. 233 // TODO(caas) - what to do here. 234 235 // Verify the charm profile before proceeding. No hooks to run, if the 236 // correct one is not yet applied. 237 _, err := s.config.VerifyCharmProfile.NextOp(localState, remoteState, opFactory) 238 if e := errors.Cause(err); e == resolver.ErrDoNotProceed { 239 return nil, resolver.ErrNoOperation 240 } else if e != resolver.ErrNoOperation { 241 return nil, err 242 } 243 244 if remoteState.ResolvedMode != params.ResolvedNone { 245 if err := s.config.ClearResolved(); err != nil { 246 return nil, errors.Trace(err) 247 } 248 return opFactory.NewResolvedUpgrade(localState.CharmURL) 249 } 250 if remoteState.ForceCharmUpgrade && s.charmModified(localState, remoteState) { 251 return opFactory.NewRevertUpgrade(remoteState.CharmURL) 252 } 253 return nil, resolver.ErrWaiting 254 } 255 256 func (s *uniterResolver) newUpgradeOperation( 257 localState resolver.LocalState, 258 remoteState remotestate.Snapshot, 259 opFactory operation.Factory, 260 ) (operation.Operation, error) { 261 // Verify the charm profile before proceeding. No hooks to run, if the 262 // correct one is not yet applied. 263 _, err := s.config.VerifyCharmProfile.NextOp(localState, remoteState, opFactory) 264 if e := errors.Cause(err); e == resolver.ErrDoNotProceed { 265 return nil, resolver.ErrNoOperation 266 } else if e != resolver.ErrNoOperation { 267 return nil, err 268 } 269 return opFactory.NewUpgrade(remoteState.CharmURL) 270 } 271 272 func (s *uniterResolver) nextOpHookError( 273 localState resolver.LocalState, 274 remoteState remotestate.Snapshot, 275 opFactory operation.Factory, 276 ) (operation.Operation, error) { 277 278 // Report the hook error. 279 if err := s.config.ReportHookError(*localState.Hook); err != nil { 280 return nil, errors.Trace(err) 281 } 282 283 if remoteState.ForceCharmUpgrade && s.charmModified(localState, remoteState) { 284 return s.newUpgradeOperation(localState, remoteState, opFactory) 285 } 286 287 switch remoteState.ResolvedMode { 288 case params.ResolvedNone: 289 if remoteState.RetryHookVersion > localState.RetryHookVersion { 290 // We've been asked to retry: clear the hook timer 291 // started state so we'll restart it if this fails. 292 // 293 // If the hook fails again, we'll re-enter this method 294 // with the retry hook versions equal and restart the 295 // timer. If the hook succeeds, we'll enter nextOp 296 // and stop the timer. 297 s.retryHookTimerStarted = false 298 return opFactory.NewRunHook(*localState.Hook) 299 } 300 if !s.retryHookTimerStarted && s.config.ShouldRetryHooks { 301 // We haven't yet started a retry timer, so start one 302 // now. If we retry and fail, retryHookTimerStarted is 303 // cleared so that we'll still start it again. 304 s.config.StartRetryHookTimer() 305 s.retryHookTimerStarted = true 306 } 307 return nil, resolver.ErrNoOperation 308 case params.ResolvedRetryHooks: 309 s.config.StopRetryHookTimer() 310 s.retryHookTimerStarted = false 311 if err := s.config.ClearResolved(); err != nil { 312 return nil, errors.Trace(err) 313 } 314 return opFactory.NewRunHook(*localState.Hook) 315 case params.ResolvedNoHooks: 316 s.config.StopRetryHookTimer() 317 s.retryHookTimerStarted = false 318 if err := s.config.ClearResolved(); err != nil { 319 return nil, errors.Trace(err) 320 } 321 return opFactory.NewSkipHook(*localState.Hook) 322 default: 323 return nil, errors.Errorf( 324 "unknown resolved mode %q", remoteState.ResolvedMode, 325 ) 326 } 327 } 328 329 func (s *uniterResolver) charmModified(local resolver.LocalState, remote remotestate.Snapshot) bool { 330 // CAAS models may not yet have read the charm url from state. 331 if remote.CharmURL == "" { 332 return false 333 } 334 if local.CharmURL != remote.CharmURL { 335 s.config.Logger.Debugf("upgrade from %v to %v", local.CharmURL, remote.CharmURL) 336 return true 337 } 338 339 if local.CharmModifiedVersion != remote.CharmModifiedVersion { 340 s.config.Logger.Debugf("upgrade from CharmModifiedVersion %v to %v", local.CharmModifiedVersion, remote.CharmModifiedVersion) 341 return true 342 } 343 return false 344 } 345 346 func (s *uniterResolver) nextOp( 347 localState resolver.LocalState, 348 remoteState remotestate.Snapshot, 349 opFactory operation.Factory, 350 ) (operation.Operation, error) { 351 switch remoteState.Life { 352 case life.Alive: 353 if remoteState.Shutdown { 354 if localState.Started && !localState.Stopped { 355 return opFactory.NewRunHook(hook.Info{Kind: hooks.Stop}) 356 } else if !localState.Started || localState.Stopped { 357 return nil, worker.ErrTerminateAgent 358 } 359 } 360 case life.Dying: 361 // Normally we handle relations last, but if we're dying we 362 // must ensure that all relations are broken first. 363 op, err := s.config.Relations.NextOp(localState, remoteState, opFactory) 364 if errors.Cause(err) != resolver.ErrNoOperation { 365 return op, err 366 } 367 368 // We're not in a hook error and the unit is Dying, 369 // so we should proceed to tear down. 370 // 371 // TODO(axw) move logic for cascading destruction of 372 // subordinates, relation units and storage 373 // attachments into state, via cleanups. 374 if localState.Started && !localState.Stopped { 375 return opFactory.NewRunHook(hook.Info{Kind: hooks.Stop}) 376 } else if localState.Installed && !localState.Removed { 377 return opFactory.NewRunHook(hook.Info{Kind: hooks.Remove}) 378 } 379 fallthrough 380 case life.Dead: 381 // The unit is dying/dead and stopped, so tell the uniter 382 // to terminate. 383 return nil, resolver.ErrUnitDead 384 } 385 386 // Now that storage hooks have run at least once, before anything else, 387 // we need to run the install hook. 388 // TODO(cmars): remove !localState.Started. It's here as a temporary 389 // measure because unit agent upgrades aren't being performed yet. 390 if !localState.Installed && !localState.Started { 391 return opFactory.NewRunHook(hook.Info{Kind: hooks.Install}) 392 } 393 394 if s.charmModified(localState, remoteState) { 395 return s.newUpgradeOperation(localState, remoteState, opFactory) 396 } 397 398 configHashChanged := localState.ConfigHash != remoteState.ConfigHash 399 trustHashChanged := localState.TrustHash != remoteState.TrustHash 400 addressesHashChanged := localState.AddressesHash != remoteState.AddressesHash 401 if configHashChanged || trustHashChanged || addressesHashChanged { 402 return opFactory.NewRunHook(hook.Info{Kind: hooks.ConfigChanged}) 403 } 404 405 op, err := s.config.Relations.NextOp(localState, remoteState, opFactory) 406 if errors.Cause(err) != resolver.ErrNoOperation { 407 return op, err 408 } 409 410 // UpdateStatus hook runs if nothing else needs to. 411 if localState.UpdateStatusVersion != remoteState.UpdateStatusVersion { 412 return opFactory.NewRunHook(hook.Info{Kind: hooks.UpdateStatus}) 413 } 414 415 return nil, resolver.ErrNoOperation 416 }