github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/uniter/uniter.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "os" 9 "sync" 10 "time" 11 12 "github.com/juju/errors" 13 "github.com/juju/loggo" 14 "github.com/juju/mutex" 15 "github.com/juju/utils" 16 "github.com/juju/utils/clock" 17 "github.com/juju/utils/exec" 18 corecharm "gopkg.in/juju/charm.v6-unstable" 19 "gopkg.in/juju/names.v2" 20 21 "github.com/juju/juju/api/uniter" 22 "github.com/juju/juju/apiserver/params" 23 "github.com/juju/juju/core/leadership" 24 "github.com/juju/juju/status" 25 "github.com/juju/juju/worker" 26 "github.com/juju/juju/worker/catacomb" 27 "github.com/juju/juju/worker/fortress" 28 "github.com/juju/juju/worker/uniter/actions" 29 "github.com/juju/juju/worker/uniter/charm" 30 "github.com/juju/juju/worker/uniter/hook" 31 uniterleadership "github.com/juju/juju/worker/uniter/leadership" 32 "github.com/juju/juju/worker/uniter/operation" 33 "github.com/juju/juju/worker/uniter/relation" 34 "github.com/juju/juju/worker/uniter/remotestate" 35 "github.com/juju/juju/worker/uniter/resolver" 36 "github.com/juju/juju/worker/uniter/runcommands" 37 "github.com/juju/juju/worker/uniter/runner" 38 "github.com/juju/juju/worker/uniter/runner/context" 39 "github.com/juju/juju/worker/uniter/runner/jujuc" 40 "github.com/juju/juju/worker/uniter/storage" 41 jujuos "github.com/juju/utils/os" 42 ) 43 44 var logger = loggo.GetLogger("juju.worker.uniter") 45 46 // A UniterExecutionObserver gets the appropriate methods called when a hook 47 // is executed and either succeeds or fails. Missing hooks don't get reported 48 // in this way. 49 type UniterExecutionObserver interface { 50 HookCompleted(hookName string) 51 HookFailed(hookName string) 52 } 53 54 // Uniter implements the capabilities of the unit agent. It is not intended to 55 // implement the actual *behaviour* of the unit agent; that responsibility is 56 // delegated to Mode values, which are expected to react to events and direct 57 // the uniter's responses to them. 58 type Uniter struct { 59 catacomb catacomb.Catacomb 60 st *uniter.State 61 paths Paths 62 unit *uniter.Unit 63 relations relation.Relations 64 storage *storage.Attachments 65 clock clock.Clock 66 67 // Cache the last reported status information 68 // so we don't make unnecessary api calls. 69 setStatusMutex sync.Mutex 70 lastReportedStatus status.Status 71 lastReportedMessage string 72 73 operationFactory operation.Factory 74 operationExecutor operation.Executor 75 newOperationExecutor NewExecutorFunc 76 77 leadershipTracker leadership.Tracker 78 charmDirGuard fortress.Guard 79 80 hookLockName string 81 82 // TODO(axw) move the runListener and run-command code outside of the 83 // uniter, and introduce a separate worker. Each worker would feed 84 // operations to a single, synchronized runner to execute. 85 runListener *RunListener 86 commands runcommands.Commands 87 commandChannel chan string 88 89 // The execution observer is only used in tests at this stage. Should this 90 // need to be extended, perhaps a list of observers would be needed. 91 observer UniterExecutionObserver 92 93 // updateStatusAt defines a function that will be used to generate signals for 94 // the update-status hook 95 updateStatusAt func() <-chan time.Time 96 97 // hookRetryStrategy represents configuration for hook retries 98 hookRetryStrategy params.RetryStrategy 99 100 // downloader is the downloader that should be used to get the charm 101 // archive. 102 downloader charm.Downloader 103 } 104 105 // UniterParams hold all the necessary parameters for a new Uniter. 106 type UniterParams struct { 107 UniterFacade *uniter.State 108 UnitTag names.UnitTag 109 LeadershipTracker leadership.Tracker 110 DataDir string 111 Downloader charm.Downloader 112 MachineLockName string 113 CharmDirGuard fortress.Guard 114 UpdateStatusSignal func() <-chan time.Time 115 HookRetryStrategy params.RetryStrategy 116 NewOperationExecutor NewExecutorFunc 117 Clock clock.Clock 118 // TODO (mattyw, wallyworld, fwereade) Having the observer here make this approach a bit more legitimate, but it isn't. 119 // the observer is only a stop gap to be used in tests. A better approach would be to have the uniter tests start hooks 120 // that write to files, and have the tests watch the output to know that hooks have finished. 121 Observer UniterExecutionObserver 122 } 123 124 type NewExecutorFunc func(string, func() (*corecharm.URL, error), func() (mutex.Releaser, error)) (operation.Executor, error) 125 126 // NewUniter creates a new Uniter which will install, run, and upgrade 127 // a charm on behalf of the unit with the given unitTag, by executing 128 // hooks and operations provoked by changes in st. 129 func NewUniter(uniterParams *UniterParams) (*Uniter, error) { 130 u := &Uniter{ 131 st: uniterParams.UniterFacade, 132 paths: NewPaths(uniterParams.DataDir, uniterParams.UnitTag), 133 hookLockName: uniterParams.MachineLockName, 134 leadershipTracker: uniterParams.LeadershipTracker, 135 charmDirGuard: uniterParams.CharmDirGuard, 136 updateStatusAt: uniterParams.UpdateStatusSignal, 137 hookRetryStrategy: uniterParams.HookRetryStrategy, 138 newOperationExecutor: uniterParams.NewOperationExecutor, 139 observer: uniterParams.Observer, 140 clock: uniterParams.Clock, 141 downloader: uniterParams.Downloader, 142 } 143 err := catacomb.Invoke(catacomb.Plan{ 144 Site: &u.catacomb, 145 Work: func() error { 146 return u.loop(uniterParams.UnitTag) 147 }, 148 }) 149 return u, errors.Trace(err) 150 } 151 152 func (u *Uniter) loop(unitTag names.UnitTag) (err error) { 153 if err := u.init(unitTag); err != nil { 154 if err == worker.ErrTerminateAgent { 155 return err 156 } 157 return errors.Annotatef(err, "failed to initialize uniter for %q", unitTag) 158 } 159 logger.Infof("unit %q started", u.unit) 160 161 // Install is a special case, as it must run before there 162 // is any remote state, and before the remote state watcher 163 // is started. 164 var charmURL *corecharm.URL 165 var charmModifiedVersion int 166 opState := u.operationExecutor.State() 167 if opState.Kind == operation.Install { 168 logger.Infof("resuming charm install") 169 op, err := u.operationFactory.NewInstall(opState.CharmURL) 170 if err != nil { 171 return errors.Trace(err) 172 } 173 if err := u.operationExecutor.Run(op); err != nil { 174 return errors.Trace(err) 175 } 176 charmURL = opState.CharmURL 177 } else { 178 curl, err := u.unit.CharmURL() 179 if err != nil { 180 return errors.Trace(err) 181 } 182 charmURL = curl 183 svc, err := u.unit.Application() 184 if err != nil { 185 return errors.Trace(err) 186 } 187 charmModifiedVersion, err = svc.CharmModifiedVersion() 188 if err != nil { 189 return errors.Trace(err) 190 } 191 } 192 193 var ( 194 watcher *remotestate.RemoteStateWatcher 195 watcherMu sync.Mutex 196 ) 197 198 logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry) 199 retryHookChan := make(chan struct{}, 1) 200 // TODO(katco): 2016-08-09: This type is deprecated: lp:1611427 201 retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{ 202 Min: u.hookRetryStrategy.MinRetryTime, 203 Max: u.hookRetryStrategy.MaxRetryTime, 204 Jitter: u.hookRetryStrategy.JitterRetryTime, 205 Factor: u.hookRetryStrategy.RetryTimeFactor, 206 Func: func() { 207 // Don't try to send on the channel if it's already full 208 // This can happen if the timer fires off before the event is consumed 209 // by the resolver loop 210 select { 211 case retryHookChan <- struct{}{}: 212 default: 213 } 214 }, 215 Clock: u.clock, 216 }) 217 defer func() { 218 // Whenever we exit the uniter we want to stop a potentially 219 // running timer so it doesn't trigger for nothing. 220 retryHookTimer.Reset() 221 }() 222 223 restartWatcher := func() error { 224 watcherMu.Lock() 225 defer watcherMu.Unlock() 226 227 if watcher != nil { 228 // watcher added to catacomb, will kill uniter if there's an error. 229 worker.Stop(watcher) 230 } 231 var err error 232 watcher, err = remotestate.NewWatcher( 233 remotestate.WatcherConfig{ 234 State: remotestate.NewAPIState(u.st), 235 LeadershipTracker: u.leadershipTracker, 236 UnitTag: unitTag, 237 UpdateStatusChannel: u.updateStatusAt, 238 CommandChannel: u.commandChannel, 239 RetryHookChannel: retryHookChan, 240 }) 241 if err != nil { 242 return errors.Trace(err) 243 } 244 if err := u.catacomb.Add(watcher); err != nil { 245 return errors.Trace(err) 246 } 247 return nil 248 } 249 250 onIdle := func() error { 251 opState := u.operationExecutor.State() 252 if opState.Kind != operation.Continue { 253 // We should only set idle status if we're in 254 // the "Continue" state, which indicates that 255 // there is nothing to do and we're not in an 256 // error state. 257 return nil 258 } 259 return setAgentStatus(u, status.Idle, "", nil) 260 } 261 262 clearResolved := func() error { 263 if err := u.unit.ClearResolved(); err != nil { 264 return errors.Trace(err) 265 } 266 watcher.ClearResolvedMode() 267 return nil 268 } 269 270 for { 271 if err = restartWatcher(); err != nil { 272 err = errors.Annotate(err, "(re)starting watcher") 273 break 274 } 275 276 uniterResolver := NewUniterResolver(ResolverConfig{ 277 ClearResolved: clearResolved, 278 ReportHookError: u.reportHookError, 279 ShouldRetryHooks: u.hookRetryStrategy.ShouldRetry, 280 StartRetryHookTimer: retryHookTimer.Start, 281 StopRetryHookTimer: retryHookTimer.Reset, 282 Actions: actions.NewResolver(), 283 Leadership: uniterleadership.NewResolver(), 284 Relations: relation.NewRelationsResolver(u.relations), 285 Storage: storage.NewResolver(u.storage), 286 Commands: runcommands.NewCommandsResolver( 287 u.commands, watcher.CommandCompleted, 288 ), 289 }) 290 291 // We should not do anything until there has been a change 292 // to the remote state. The watcher will trigger at least 293 // once initially. 294 select { 295 case <-u.catacomb.Dying(): 296 return u.catacomb.ErrDying() 297 case <-watcher.RemoteStateChanged(): 298 } 299 300 localState := resolver.LocalState{ 301 CharmURL: charmURL, 302 CharmModifiedVersion: charmModifiedVersion, 303 } 304 for err == nil { 305 err = resolver.Loop(resolver.LoopConfig{ 306 Resolver: uniterResolver, 307 Watcher: watcher, 308 Executor: u.operationExecutor, 309 Factory: u.operationFactory, 310 Abort: u.catacomb.Dying(), 311 OnIdle: onIdle, 312 CharmDirGuard: u.charmDirGuard, 313 }, &localState) 314 switch cause := errors.Cause(err); cause { 315 case nil: 316 // Loop back around. 317 case resolver.ErrLoopAborted: 318 err = u.catacomb.ErrDying() 319 case operation.ErrNeedsReboot: 320 err = worker.ErrRebootMachine 321 case operation.ErrHookFailed: 322 // Loop back around. The resolver can tell that it is in 323 // an error state by inspecting the operation state. 324 err = nil 325 case resolver.ErrTerminate: 326 err = u.terminate() 327 case resolver.ErrRestart: 328 // make sure we update the two values used above in 329 // creating LocalState. 330 charmURL = localState.CharmURL 331 charmModifiedVersion = localState.CharmModifiedVersion 332 // leave err assigned, causing loop to break 333 default: 334 // We need to set conflicted from here, because error 335 // handling is outside of the resolver's control. 336 if operation.IsDeployConflictError(cause) { 337 localState.Conflicted = true 338 err = setAgentStatus(u, status.Error, "upgrade failed", nil) 339 } else { 340 reportAgentError(u, "resolver loop error", err) 341 } 342 } 343 } 344 345 if errors.Cause(err) != resolver.ErrRestart { 346 break 347 } 348 } 349 350 logger.Infof("unit %q shutting down: %s", u.unit, err) 351 return err 352 } 353 354 func (u *Uniter) terminate() error { 355 unitWatcher, err := u.unit.Watch() 356 if err != nil { 357 return errors.Trace(err) 358 } 359 if err := u.catacomb.Add(unitWatcher); err != nil { 360 return errors.Trace(err) 361 } 362 for { 363 select { 364 case <-u.catacomb.Dying(): 365 return u.catacomb.ErrDying() 366 case _, ok := <-unitWatcher.Changes(): 367 if !ok { 368 return errors.New("unit watcher closed") 369 } 370 if err := u.unit.Refresh(); err != nil { 371 return errors.Trace(err) 372 } 373 if hasSubs, err := u.unit.HasSubordinates(); err != nil { 374 return errors.Trace(err) 375 } else if hasSubs { 376 continue 377 } 378 // The unit is known to be Dying; so if it didn't have subordinates 379 // just above, it can't acquire new ones before this call. 380 if err := u.unit.EnsureDead(); err != nil { 381 return errors.Trace(err) 382 } 383 return worker.ErrTerminateAgent 384 } 385 } 386 } 387 388 func (u *Uniter) init(unitTag names.UnitTag) (err error) { 389 u.unit, err = u.st.Unit(unitTag) 390 if err != nil { 391 return err 392 } 393 if u.unit.Life() == params.Dead { 394 // If we started up already dead, we should not progress further. If we 395 // become Dead immediately after starting up, we may well complete any 396 // operations in progress before detecting it; but that race is fundamental 397 // and inescapable, whereas this one is not. 398 return worker.ErrTerminateAgent 399 } 400 // If initialising for the first time after deploying, update the status. 401 currentStatus, err := u.unit.UnitStatus() 402 if err != nil { 403 return err 404 } 405 // TODO(fwereade/wallyworld): we should have an explicit place in the model 406 // to tell us when we've hit this point, instead of piggybacking on top of 407 // status and/or status history. 408 // If the previous status was waiting for machine, we transition to the next step. 409 if currentStatus.Status == string(status.Waiting) && 410 (currentStatus.Info == status.MessageWaitForMachine || currentStatus.Info == status.MessageInstallingAgent) { 411 if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil { 412 return errors.Trace(err) 413 } 414 } 415 if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil { 416 return err 417 } 418 if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil { 419 return errors.Trace(err) 420 } 421 relations, err := relation.NewRelations( 422 u.st, unitTag, u.paths.State.CharmDir, 423 u.paths.State.RelationsDir, u.catacomb.Dying(), 424 ) 425 if err != nil { 426 return errors.Annotatef(err, "cannot create relations") 427 } 428 u.relations = relations 429 storageAttachments, err := storage.NewAttachments( 430 u.st, unitTag, u.paths.State.StorageDir, u.catacomb.Dying(), 431 ) 432 if err != nil { 433 return errors.Annotatef(err, "cannot create storage hook source") 434 } 435 u.storage = storageAttachments 436 u.commands = runcommands.NewCommands() 437 u.commandChannel = make(chan string) 438 439 if err := charm.ClearDownloads(u.paths.State.BundlesDir); err != nil { 440 logger.Warningf(err.Error()) 441 } 442 deployer, err := charm.NewDeployer( 443 u.paths.State.CharmDir, 444 u.paths.State.DeployerDir, 445 charm.NewBundlesDir(u.paths.State.BundlesDir, u.downloader), 446 ) 447 if err != nil { 448 return errors.Annotatef(err, "cannot create deployer") 449 } 450 contextFactory, err := context.NewContextFactory( 451 u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths, u.clock, 452 ) 453 if err != nil { 454 return err 455 } 456 runnerFactory, err := runner.NewFactory( 457 u.st, u.paths, contextFactory, 458 ) 459 if err != nil { 460 return errors.Trace(err) 461 } 462 u.operationFactory = operation.NewFactory(operation.FactoryParams{ 463 Deployer: deployer, 464 RunnerFactory: runnerFactory, 465 Callbacks: &operationCallbacks{u}, 466 Abort: u.catacomb.Dying(), 467 MetricSpoolDir: u.paths.GetMetricsSpoolDir(), 468 }) 469 470 operationExecutor, err := u.newOperationExecutor(u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock) 471 if err != nil { 472 return errors.Trace(err) 473 } 474 u.operationExecutor = operationExecutor 475 476 logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket) 477 commandRunner, err := NewChannelCommandRunner(ChannelCommandRunnerConfig{ 478 Abort: u.catacomb.Dying(), 479 Commands: u.commands, 480 CommandChannel: u.commandChannel, 481 }) 482 if err != nil { 483 return errors.Annotate(err, "creating command runner") 484 } 485 u.runListener, err = NewRunListener(RunListenerConfig{ 486 SocketPath: u.paths.Runtime.JujuRunSocket, 487 CommandRunner: commandRunner, 488 }) 489 if err != nil { 490 return errors.Trace(err) 491 } 492 rlw := newRunListenerWrapper(u.runListener) 493 if err := u.catacomb.Add(rlw); err != nil { 494 return errors.Trace(err) 495 } 496 // The socket needs to have permissions 777 in order for other users to use it. 497 if jujuos.HostOS() != jujuos.Windows { 498 return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777) 499 } 500 return nil 501 } 502 503 func (u *Uniter) Kill() { 504 u.catacomb.Kill(nil) 505 } 506 507 func (u *Uniter) Wait() error { 508 return u.catacomb.Wait() 509 } 510 511 func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) { 512 // TODO(fwereade): pretty sure there's no reason to make 2 API calls here. 513 service, err := u.st.Application(u.unit.ApplicationTag()) 514 if err != nil { 515 return nil, err 516 } 517 charmURL, _, err := service.CharmURL() 518 return charmURL, err 519 } 520 521 // RunCommands executes the supplied commands in a hook context. 522 func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) { 523 // TODO(axw) drop this when we move the run-listener to an independent 524 // worker. This exists purely for the tests. 525 return u.runListener.RunCommands(args) 526 } 527 528 // acquireExecutionLock acquires the machine-level execution lock, and 529 // returns a func that must be called to unlock it. It's used by operation.Executor 530 // when running operations that execute external code. 531 func (u *Uniter) acquireExecutionLock() (mutex.Releaser, error) { 532 // We want to make sure we don't block forever when locking, but take the 533 // Uniter's catacomb into account. 534 spec := mutex.Spec{ 535 Name: u.hookLockName, 536 Clock: u.clock, 537 Delay: 250 * time.Millisecond, 538 Cancel: u.catacomb.Dying(), 539 } 540 logger.Debugf("acquire lock %q for uniter hook execution", u.hookLockName) 541 releaser, err := mutex.Acquire(spec) 542 if err != nil { 543 return nil, errors.Trace(err) 544 } 545 logger.Debugf("lock %q acquired", u.hookLockName) 546 return releaser, nil 547 } 548 549 func (u *Uniter) reportHookError(hookInfo hook.Info) error { 550 // Set the agent status to "error". We must do this here in case the 551 // hook is interrupted (e.g. unit agent crashes), rather than immediately 552 // after attempting a runHookOp. 553 hookName := string(hookInfo.Kind) 554 statusData := map[string]interface{}{} 555 if hookInfo.Kind.IsRelation() { 556 statusData["relation-id"] = hookInfo.RelationId 557 if hookInfo.RemoteUnit != "" { 558 statusData["remote-unit"] = hookInfo.RemoteUnit 559 } 560 relationName, err := u.relations.Name(hookInfo.RelationId) 561 if err != nil { 562 return errors.Trace(err) 563 } 564 hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind) 565 } 566 statusData["hook"] = hookName 567 statusMessage := fmt.Sprintf("hook failed: %q", hookName) 568 return setAgentStatus(u, status.Error, statusMessage, statusData) 569 }