github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/uniter/uniter.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "os" 9 "sync" 10 11 "github.com/juju/clock" 12 "github.com/juju/errors" 13 "github.com/juju/loggo" 14 "github.com/juju/utils" 15 "github.com/juju/utils/exec" 16 corecharm "gopkg.in/juju/charm.v6" 17 "gopkg.in/juju/charm.v6/hooks" 18 "gopkg.in/juju/names.v2" 19 "gopkg.in/juju/worker.v1" 20 "gopkg.in/juju/worker.v1/catacomb" 21 22 "github.com/juju/juju/agent/tools" 23 "github.com/juju/juju/api/uniter" 24 "github.com/juju/juju/apiserver/params" 25 "github.com/juju/juju/core/leadership" 26 "github.com/juju/juju/core/machinelock" 27 "github.com/juju/juju/core/model" 28 "github.com/juju/juju/core/status" 29 "github.com/juju/juju/core/watcher" 30 jworker "github.com/juju/juju/worker" 31 "github.com/juju/juju/worker/fortress" 32 "github.com/juju/juju/worker/uniter/actions" 33 "github.com/juju/juju/worker/uniter/charm" 34 "github.com/juju/juju/worker/uniter/hook" 35 uniterleadership "github.com/juju/juju/worker/uniter/leadership" 36 "github.com/juju/juju/worker/uniter/operation" 37 "github.com/juju/juju/worker/uniter/relation" 38 "github.com/juju/juju/worker/uniter/remotestate" 39 "github.com/juju/juju/worker/uniter/resolver" 40 "github.com/juju/juju/worker/uniter/runcommands" 41 "github.com/juju/juju/worker/uniter/runner" 42 "github.com/juju/juju/worker/uniter/runner/context" 43 "github.com/juju/juju/worker/uniter/runner/jujuc" 44 "github.com/juju/juju/worker/uniter/storage" 45 "github.com/juju/juju/worker/uniter/upgradecharmprofile" 46 "github.com/juju/juju/worker/uniter/upgradeseries" 47 ) 48 49 var logger = loggo.GetLogger("juju.worker.uniter") 50 51 // A UniterExecutionObserver gets the appropriate methods called when a hook 52 // is executed and either succeeds or fails. Missing hooks don't get reported 53 // in this way. 54 type UniterExecutionObserver interface { 55 HookCompleted(hookName string) 56 HookFailed(hookName string) 57 } 58 59 // Uniter implements the capabilities of the unit agent. It is not intended to 60 // implement the actual *behaviour* of the unit agent; that responsibility is 61 // delegated to Mode values, which are expected to react to events and direct 62 // the uniter's responses to them. 63 type Uniter struct { 64 catacomb catacomb.Catacomb 65 st *uniter.State 66 paths Paths 67 unit *uniter.Unit 68 modelType model.ModelType 69 relations relation.Relations 70 storage *storage.Attachments 71 clock clock.Clock 72 73 // Cache the last reported status information 74 // so we don't make unnecessary api calls. 75 setStatusMutex sync.Mutex 76 lastReportedStatus status.Status 77 lastReportedMessage string 78 79 operationFactory operation.Factory 80 operationExecutor operation.Executor 81 newOperationExecutor NewExecutorFunc 82 translateResolverErr func(error) error 83 84 leadershipTracker leadership.Tracker 85 charmDirGuard fortress.Guard 86 87 hookLock machinelock.Lock 88 89 // TODO(axw) move the runListener and run-command code outside of the 90 // uniter, and introduce a separate worker. Each worker would feed 91 // operations to a single, synchronized runner to execute. 92 runListener *RunListener 93 commands runcommands.Commands 94 commandChannel chan string 95 96 // The execution observer is only used in tests at this stage. Should this 97 // need to be extended, perhaps a list of observers would be needed. 98 observer UniterExecutionObserver 99 100 // updateStatusAt defines a function that will be used to generate signals for 101 // the update-status hook 102 updateStatusAt remotestate.UpdateStatusTimerFunc 103 104 // applicationChannel, if set, is used to signal a change in the 105 // application's charm. It is passed to the remote state watcher. 106 applicationChannel watcher.NotifyChannel 107 108 // hookRetryStrategy represents configuration for hook retries 109 hookRetryStrategy params.RetryStrategy 110 111 // downloader is the downloader that should be used to get the charm 112 // archive. 113 downloader charm.Downloader 114 } 115 116 // UniterParams hold all the necessary parameters for a new Uniter. 117 type UniterParams struct { 118 UniterFacade *uniter.State 119 UnitTag names.UnitTag 120 LeadershipTracker leadership.Tracker 121 DataDir string 122 Downloader charm.Downloader 123 MachineLock machinelock.Lock 124 CharmDirGuard fortress.Guard 125 UpdateStatusSignal remotestate.UpdateStatusTimerFunc 126 HookRetryStrategy params.RetryStrategy 127 NewOperationExecutor NewExecutorFunc 128 TranslateResolverErr func(error) error 129 Clock clock.Clock 130 ApplicationChannel watcher.NotifyChannel 131 // TODO (mattyw, wallyworld, fwereade) Having the observer here make this approach a bit more legitimate, but it isn't. 132 // the observer is only a stop gap to be used in tests. A better approach would be to have the uniter tests start hooks 133 // that write to files, and have the tests watch the output to know that hooks have finished. 134 Observer UniterExecutionObserver 135 } 136 137 type NewExecutorFunc func(string, operation.State, func(string) (func(), error)) (operation.Executor, error) 138 139 // NewUniter creates a new Uniter which will install, run, and upgrade 140 // a charm on behalf of the unit with the given unitTag, by executing 141 // hooks and operations provoked by changes in st. 142 func NewUniter(uniterParams *UniterParams) (*Uniter, error) { 143 startFunc := newUniter(uniterParams) 144 w, err := startFunc() 145 return w.(*Uniter), err 146 } 147 148 // StartUniter creates a new Uniter and starts it using the specified runner. 149 func StartUniter(runner *worker.Runner, params *UniterParams) error { 150 startFunc := newUniter(params) 151 152 logger.Debugf("starting uniter for %q", params.UnitTag.Id()) 153 err := runner.StartWorker(params.UnitTag.Id(), startFunc) 154 return errors.Annotate(err, "error starting uniter worker") 155 } 156 157 func newUniter(uniterParams *UniterParams) func() (worker.Worker, error) { 158 translateResolverErr := uniterParams.TranslateResolverErr 159 if translateResolverErr == nil { 160 translateResolverErr = func(err error) error { return err } 161 } 162 163 u := &Uniter{ 164 st: uniterParams.UniterFacade, 165 paths: NewPaths(uniterParams.DataDir, uniterParams.UnitTag), 166 hookLock: uniterParams.MachineLock, 167 leadershipTracker: uniterParams.LeadershipTracker, 168 charmDirGuard: uniterParams.CharmDirGuard, 169 updateStatusAt: uniterParams.UpdateStatusSignal, 170 hookRetryStrategy: uniterParams.HookRetryStrategy, 171 newOperationExecutor: uniterParams.NewOperationExecutor, 172 translateResolverErr: translateResolverErr, 173 observer: uniterParams.Observer, 174 clock: uniterParams.Clock, 175 downloader: uniterParams.Downloader, 176 applicationChannel: uniterParams.ApplicationChannel, 177 } 178 startFunc := func() (worker.Worker, error) { 179 if err := catacomb.Invoke(catacomb.Plan{ 180 Site: &u.catacomb, 181 Work: func() error { 182 return u.loop(uniterParams.UnitTag) 183 }, 184 }); err != nil { 185 return nil, errors.Trace(err) 186 } 187 return u, nil 188 } 189 return startFunc 190 } 191 192 func (u *Uniter) loop(unitTag names.UnitTag) (err error) { 193 if err := u.init(unitTag); err != nil { 194 if err == jworker.ErrTerminateAgent { 195 return err 196 } 197 return errors.Annotatef(err, "failed to initialize uniter for %q", unitTag) 198 } 199 logger.Infof("unit %q started", u.unit) 200 201 // Install is a special case, as it must run before there 202 // is any remote state, and before the remote state watcher 203 // is started. 204 var charmURL *corecharm.URL 205 var charmModifiedVersion int 206 opState := u.operationExecutor.State() 207 if opState.Kind == operation.Install { 208 logger.Infof("resuming charm install") 209 op, err := u.operationFactory.NewInstall(opState.CharmURL) 210 if err != nil { 211 return errors.Trace(err) 212 } 213 if err := u.operationExecutor.Run(op); err != nil { 214 return errors.Trace(err) 215 } 216 charmURL = opState.CharmURL 217 } else { 218 curl, err := u.unit.CharmURL() 219 if err != nil { 220 return errors.Trace(err) 221 } 222 charmURL = curl 223 app, err := u.unit.Application() 224 if err != nil { 225 return errors.Trace(err) 226 } 227 charmModifiedVersion, err = app.CharmModifiedVersion() 228 if err != nil { 229 return errors.Trace(err) 230 } 231 } 232 233 var ( 234 watcher *remotestate.RemoteStateWatcher 235 watcherMu sync.Mutex 236 ) 237 238 logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry) 239 retryHookChan := make(chan struct{}, 1) 240 // TODO(katco): 2016-08-09: This type is deprecated: lp:1611427 241 retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{ 242 Min: u.hookRetryStrategy.MinRetryTime, 243 Max: u.hookRetryStrategy.MaxRetryTime, 244 Jitter: u.hookRetryStrategy.JitterRetryTime, 245 Factor: u.hookRetryStrategy.RetryTimeFactor, 246 Func: func() { 247 // Don't try to send on the channel if it's already full 248 // This can happen if the timer fires off before the event is consumed 249 // by the resolver loop 250 select { 251 case retryHookChan <- struct{}{}: 252 default: 253 } 254 }, 255 Clock: u.clock, 256 }) 257 defer func() { 258 // Whenever we exit the uniter we want to stop a potentially 259 // running timer so it doesn't trigger for nothing. 260 retryHookTimer.Reset() 261 }() 262 263 restartWatcher := func() error { 264 watcherMu.Lock() 265 defer watcherMu.Unlock() 266 267 if watcher != nil { 268 // watcher added to catacomb, will kill uniter if there's an error. 269 worker.Stop(watcher) 270 } 271 var err error 272 watcher, err = remotestate.NewWatcher( 273 remotestate.WatcherConfig{ 274 State: remotestate.NewAPIState(u.st), 275 LeadershipTracker: u.leadershipTracker, 276 UnitTag: unitTag, 277 UpdateStatusChannel: u.updateStatusAt, 278 CommandChannel: u.commandChannel, 279 RetryHookChannel: retryHookChan, 280 ApplicationChannel: u.applicationChannel, 281 ModelType: u.modelType, 282 }) 283 if err != nil { 284 return errors.Trace(err) 285 } 286 if err := u.catacomb.Add(watcher); err != nil { 287 return errors.Trace(err) 288 } 289 return nil 290 } 291 292 onIdle := func() error { 293 opState := u.operationExecutor.State() 294 if opState.Kind != operation.Continue { 295 // We should only set idle status if we're in 296 // the "Continue" state, which indicates that 297 // there is nothing to do and we're not in an 298 // error state. 299 return nil 300 } 301 return setAgentStatus(u, status.Idle, "", nil) 302 } 303 304 clearResolved := func() error { 305 if err := u.unit.ClearResolved(); err != nil { 306 return errors.Trace(err) 307 } 308 watcher.ClearResolvedMode() 309 return nil 310 } 311 312 for { 313 if err = restartWatcher(); err != nil { 314 err = errors.Annotate(err, "(re)starting watcher") 315 break 316 } 317 318 cfg := ResolverConfig{ 319 ModelType: u.modelType, 320 ClearResolved: clearResolved, 321 ReportHookError: u.reportHookError, 322 ShouldRetryHooks: u.hookRetryStrategy.ShouldRetry, 323 StartRetryHookTimer: retryHookTimer.Start, 324 StopRetryHookTimer: retryHookTimer.Reset, 325 Actions: actions.NewResolver(), 326 UpgradeSeries: upgradeseries.NewResolver(), 327 UpgradeCharmProfile: upgradecharmprofile.NewResolver(), 328 Leadership: uniterleadership.NewResolver(), 329 Relations: relation.NewRelationsResolver(u.relations), 330 Storage: storage.NewResolver(u.storage, u.modelType), 331 Commands: runcommands.NewCommandsResolver( 332 u.commands, watcher.CommandCompleted, 333 ), 334 } 335 uniterResolver := NewUniterResolver(cfg) 336 337 // We should not do anything until there has been a change 338 // to the remote state. The watcher will trigger at least 339 // once initially. 340 select { 341 case <-u.catacomb.Dying(): 342 return u.catacomb.ErrDying() 343 case <-watcher.RemoteStateChanged(): 344 } 345 346 localState := resolver.LocalState{ 347 CharmURL: charmURL, 348 CharmModifiedVersion: charmModifiedVersion, 349 UpgradeSeriesStatus: model.UpgradeSeriesNotStarted, 350 } 351 for err == nil { 352 err = resolver.Loop(resolver.LoopConfig{ 353 Resolver: uniterResolver, 354 Watcher: watcher, 355 Executor: u.operationExecutor, 356 Factory: u.operationFactory, 357 Abort: u.catacomb.Dying(), 358 OnIdle: onIdle, 359 CharmDirGuard: u.charmDirGuard, 360 }, &localState) 361 362 err = u.translateResolverErr(err) 363 364 switch cause := errors.Cause(err); cause { 365 case nil: 366 // Loop back around. 367 case resolver.ErrLoopAborted: 368 err = u.catacomb.ErrDying() 369 case operation.ErrNeedsReboot: 370 err = jworker.ErrRebootMachine 371 case operation.ErrHookFailed: 372 // Loop back around. The resolver can tell that it is in 373 // an error state by inspecting the operation state. 374 err = nil 375 case resolver.ErrTerminate: 376 err = u.terminate() 377 case resolver.ErrRestart: 378 // make sure we update the two values used above in 379 // creating LocalState. 380 charmURL = localState.CharmURL 381 charmModifiedVersion = localState.CharmModifiedVersion 382 // leave err assigned, causing loop to break 383 default: 384 // We need to set conflicted from here, because error 385 // handling is outside of the resolver's control. 386 if operation.IsDeployConflictError(cause) { 387 localState.Conflicted = true 388 err = setAgentStatus(u, status.Error, "upgrade failed", nil) 389 } else { 390 reportAgentError(u, "resolver loop error", err) 391 } 392 } 393 } 394 395 if errors.Cause(err) != resolver.ErrRestart { 396 break 397 } 398 } 399 400 logger.Infof("unit %q shutting down: %s", u.unit, err) 401 return err 402 } 403 404 func (u *Uniter) terminate() error { 405 unitWatcher, err := u.unit.Watch() 406 if err != nil { 407 return errors.Trace(err) 408 } 409 if err := u.catacomb.Add(unitWatcher); err != nil { 410 return errors.Trace(err) 411 } 412 for { 413 select { 414 case <-u.catacomb.Dying(): 415 return u.catacomb.ErrDying() 416 case _, ok := <-unitWatcher.Changes(): 417 if !ok { 418 return errors.New("unit watcher closed") 419 } 420 if err := u.unit.Refresh(); err != nil { 421 return errors.Trace(err) 422 } 423 if hasSubs, err := u.unit.HasSubordinates(); err != nil { 424 return errors.Trace(err) 425 } else if hasSubs { 426 continue 427 } 428 // The unit is known to be Dying; so if it didn't have subordinates 429 // just above, it can't acquire new ones before this call. 430 if err := u.unit.EnsureDead(); err != nil { 431 return errors.Trace(err) 432 } 433 return u.stopUnitError() 434 } 435 } 436 } 437 438 // stopUnitError returns the error to use when exiting from stopping the unit. 439 // For IAAS models, we want to terminate the agent, as each unit is run by 440 // an individual agent for that unit. 441 func (u *Uniter) stopUnitError() error { 442 if u.modelType == model.IAAS { 443 return jworker.ErrTerminateAgent 444 } 445 return nil 446 } 447 448 func (u *Uniter) init(unitTag names.UnitTag) (err error) { 449 u.unit, err = u.st.Unit(unitTag) 450 if err != nil { 451 return err 452 } 453 if u.unit.Life() == params.Dead { 454 // If we started up already dead, we should not progress further. If we 455 // become Dead immediately after starting up, we may well complete any 456 // operations in progress before detecting it; but that race is fundamental 457 // and inescapable, whereas this one is not. 458 return u.stopUnitError() 459 } 460 // If initialising for the first time after deploying, update the status. 461 currentStatus, err := u.unit.UnitStatus() 462 if err != nil { 463 return err 464 } 465 // TODO(fwereade/wallyworld): we should have an explicit place in the model 466 // to tell us when we've hit this point, instead of piggybacking on top of 467 // status and/or status history. 468 // If the previous status was waiting for machine, we transition to the next step. 469 if currentStatus.Status == string(status.Waiting) && 470 (currentStatus.Info == status.MessageWaitForMachine || currentStatus.Info == status.MessageInstallingAgent) { 471 if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil { 472 return errors.Trace(err) 473 } 474 } 475 if err := tools.EnsureSymlinks(u.paths.ToolsDir, u.paths.ToolsDir, jujuc.CommandNames()); err != nil { 476 return err 477 } 478 if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil { 479 return errors.Trace(err) 480 } 481 relations, err := relation.NewRelations( 482 relation.RelationsConfig{ 483 State: u.st, 484 UnitTag: unitTag, 485 Tracker: u.leadershipTracker, 486 NewLeadershipContext: context.NewLeadershipContext, 487 CharmDir: u.paths.State.CharmDir, 488 RelationsDir: u.paths.State.RelationsDir, 489 Abort: u.catacomb.Dying(), 490 }) 491 if err != nil { 492 return errors.Annotatef(err, "cannot create relations") 493 } 494 u.relations = relations 495 u.commands = runcommands.NewCommands() 496 u.commandChannel = make(chan string) 497 498 m, err := u.st.Model() 499 if err != nil { 500 return errors.Trace(err) 501 } 502 u.modelType = m.ModelType 503 storageAttachments, err := storage.NewAttachments( 504 u.st, unitTag, u.paths.State.StorageDir, u.catacomb.Dying(), 505 ) 506 if err != nil { 507 return errors.Annotatef(err, "cannot create storage hook source") 508 } 509 u.storage = storageAttachments 510 511 // Only IAAS models require the uniter to install charms. 512 // For CAAS models this is done by the operator. 513 var deployer charm.Deployer 514 if u.modelType == model.IAAS { 515 if err := charm.ClearDownloads(u.paths.State.BundlesDir); err != nil { 516 logger.Warningf(err.Error()) 517 } 518 deployer, err = charm.NewDeployer( 519 u.paths.State.CharmDir, 520 u.paths.State.DeployerDir, 521 charm.NewBundlesDir(u.paths.State.BundlesDir, u.downloader), 522 ) 523 if err != nil { 524 return errors.Annotatef(err, "cannot create deployer") 525 } 526 } 527 contextFactory, err := context.NewContextFactory(context.FactoryConfig{ 528 State: u.st, 529 UnitTag: unitTag, 530 Tracker: u.leadershipTracker, 531 GetRelationInfos: u.relations.GetInfo, 532 Storage: u.storage, 533 Paths: u.paths, 534 Clock: u.clock, 535 }) 536 if err != nil { 537 return err 538 } 539 runnerFactory, err := runner.NewFactory( 540 u.st, u.paths, contextFactory, 541 ) 542 if err != nil { 543 return errors.Trace(err) 544 } 545 u.operationFactory = operation.NewFactory(operation.FactoryParams{ 546 Deployer: deployer, 547 RunnerFactory: runnerFactory, 548 Callbacks: &operationCallbacks{u}, 549 Abort: u.catacomb.Dying(), 550 MetricSpoolDir: u.paths.GetMetricsSpoolDir(), 551 }) 552 553 charmURL, err := u.getApplicationCharmURL() 554 if err != nil { 555 return errors.Trace(err) 556 } 557 558 var initialState operation.State 559 if u.modelType == model.IAAS { 560 initialState = operation.State{ 561 Kind: operation.Install, 562 Step: operation.Queued, 563 CharmURL: charmURL, 564 } 565 } else { 566 initialState = operation.State{ 567 Hook: &hook.Info{Kind: hooks.Start}, 568 Kind: operation.RunHook, 569 Step: operation.Queued, 570 Installed: true, 571 } 572 if err := u.unit.SetCharmURL(charmURL); err != nil { 573 return errors.Trace(err) 574 } 575 } 576 operationExecutor, err := u.newOperationExecutor(u.paths.State.OperationsFile, initialState, u.acquireExecutionLock) 577 if err != nil { 578 return errors.Trace(err) 579 } 580 u.operationExecutor = operationExecutor 581 582 logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket) 583 commandRunner, err := NewChannelCommandRunner(ChannelCommandRunnerConfig{ 584 Abort: u.catacomb.Dying(), 585 Commands: u.commands, 586 CommandChannel: u.commandChannel, 587 }) 588 if err != nil { 589 return errors.Annotate(err, "creating command runner") 590 } 591 u.runListener, err = NewRunListener(RunListenerConfig{ 592 SocketPath: u.paths.Runtime.JujuRunSocket, 593 CommandRunner: commandRunner, 594 }) 595 if err != nil { 596 return errors.Annotate(err, "creating juju run listener") 597 } 598 rlw := newRunListenerWrapper(u.runListener) 599 if err := u.catacomb.Add(rlw); err != nil { 600 return errors.Trace(err) 601 } 602 return nil 603 } 604 605 func (u *Uniter) Kill() { 606 u.catacomb.Kill(nil) 607 } 608 609 func (u *Uniter) Wait() error { 610 return u.catacomb.Wait() 611 } 612 613 func (u *Uniter) getApplicationCharmURL() (*corecharm.URL, error) { 614 // TODO(fwereade): pretty sure there's no reason to make 2 API calls here. 615 app, err := u.st.Application(u.unit.ApplicationTag()) 616 if err != nil { 617 return nil, err 618 } 619 charmURL, _, err := app.CharmURL() 620 return charmURL, err 621 } 622 623 // RunCommands executes the supplied commands in a hook context. 624 func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) { 625 // TODO(axw) drop this when we move the run-listener to an independent 626 // worker. This exists purely for the tests. 627 return u.runListener.RunCommands(args) 628 } 629 630 // acquireExecutionLock acquires the machine-level execution lock, and 631 // returns a func that must be called to unlock it. It's used by operation.Executor 632 // when running operations that execute external code. 633 func (u *Uniter) acquireExecutionLock(action string) (func(), error) { 634 // We want to make sure we don't block forever when locking, but take the 635 // Uniter's catacomb into account. 636 spec := machinelock.Spec{ 637 Cancel: u.catacomb.Dying(), 638 Worker: "uniter", 639 Comment: action, 640 } 641 releaser, err := u.hookLock.Acquire(spec) 642 if err != nil { 643 return nil, errors.Trace(err) 644 } 645 return releaser, nil 646 } 647 648 func (u *Uniter) reportHookError(hookInfo hook.Info) error { 649 // Set the agent status to "error". We must do this here in case the 650 // hook is interrupted (e.g. unit agent crashes), rather than immediately 651 // after attempting a runHookOp. 652 hookName := string(hookInfo.Kind) 653 statusData := map[string]interface{}{} 654 if hookInfo.Kind.IsRelation() { 655 statusData["relation-id"] = hookInfo.RelationId 656 if hookInfo.RemoteUnit != "" { 657 statusData["remote-unit"] = hookInfo.RemoteUnit 658 } 659 relationName, err := u.relations.Name(hookInfo.RelationId) 660 if err != nil { 661 return errors.Trace(err) 662 } 663 hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind) 664 } 665 statusData["hook"] = hookName 666 statusMessage := fmt.Sprintf("hook failed: %q", hookName) 667 return setAgentStatus(u, status.Error, statusMessage, statusData) 668 }