github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/uniter/modes.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "gopkg.in/juju/charm.v5" 12 "gopkg.in/juju/charm.v5/hooks" 13 "launchpad.net/tomb" 14 15 "github.com/juju/juju/apiserver/params" 16 "github.com/juju/juju/state/watcher" 17 "github.com/juju/juju/worker" 18 "github.com/juju/juju/worker/uniter/hook" 19 "github.com/juju/juju/worker/uniter/operation" 20 ) 21 22 // setAgentStatus sets the unit's status if it has changed since last time this method was called. 23 func setAgentStatus(u *Uniter, status params.Status, info string, data map[string]interface{}) error { 24 u.setStatusMutex.Lock() 25 defer u.setStatusMutex.Unlock() 26 if u.lastReportedStatus == status && u.lastReportedMessage == info { 27 return nil 28 } 29 u.lastReportedStatus = status 30 u.lastReportedMessage = info 31 logger.Debugf("[AGENT-STATUS] %s: %s", status, info) 32 return u.unit.SetAgentStatus(status, info, data) 33 } 34 35 // updateAgentStatus updates the agent status to reflect what the uniter is doing, 36 // or to report on an error. 37 func updateAgentStatus(u *Uniter, userMessage string, err error) { 38 // If there was an error performing the operation, set the state 39 // of the agent to Failed. 40 if err != nil { 41 msg := fmt.Sprintf("%s: %v", userMessage, err) 42 err2 := setAgentStatus(u, params.StatusFailed, msg, nil) 43 if err2 != nil { 44 logger.Errorf("updating agent status: %v", err2) 45 } 46 return 47 } 48 // Anything else, the uniter is doing something, running a hook or action etc. 49 err2 := setAgentStatus(u, params.StatusExecuting, userMessage, nil) 50 if err2 != nil { 51 logger.Errorf("updating agent status: %v", err2) 52 } 53 } 54 55 // Mode defines the signature of the functions that implement the possible 56 // states of a running Uniter. 57 type Mode func(u *Uniter) (Mode, error) 58 59 // ModeContinue determines what action to take based on persistent uniter state. 60 func ModeContinue(u *Uniter) (next Mode, err error) { 61 defer modeContext("ModeContinue", &err)() 62 opState := u.operationState() 63 64 // Resume interrupted deployment operations. 65 if opState.Kind == operation.Install { 66 logger.Infof("resuming charm install") 67 return ModeInstalling(opState.CharmURL) 68 } else if opState.Kind == operation.Upgrade { 69 logger.Infof("resuming charm upgrade") 70 return ModeUpgrading(opState.CharmURL), nil 71 } 72 73 // If we got this far, we should have an installed charm, 74 // so initialize the metrics collector according to what's 75 // currently deployed. 76 if err := u.initializeMetricsCollector(); err != nil { 77 return nil, errors.Trace(err) 78 } 79 80 // Check for any leadership change, and enact it if possible. 81 logger.Infof("checking leadership status") 82 // If we've already accepted leadership, we don't need to do it again. 83 canAcceptLeader := !opState.Leader 84 select { 85 // If the unit's shutting down, we shouldn't accept it. 86 case <-u.f.UnitDying(): 87 canAcceptLeader = false 88 default: 89 // If we're in an unexpected mode (eg pending hook) we shouldn't try either. 90 if opState.Kind != operation.Continue { 91 canAcceptLeader = false 92 } 93 } 94 95 // NOTE: the Wait() looks scary, but a ClaimLeadership ticket should always 96 // complete quickly; worst-case is API latency time, but it's designed that 97 // it should be vanishingly rare to hit that code path. 98 isLeader := u.leadershipTracker.ClaimLeader().Wait() 99 var creator creator 100 switch { 101 case isLeader && canAcceptLeader: 102 creator = newAcceptLeadershipOp() 103 case opState.Leader && !isLeader: 104 creator = newResignLeadershipOp() 105 } 106 if creator != nil { 107 return continueAfter(u, creator) 108 } 109 logger.Infof("leadership status is up-to-date") 110 111 switch opState.Kind { 112 case operation.RunAction: 113 // TODO(fwereade): we *should* handle interrupted actions, and make sure 114 // they're marked as failed, but that's not for now. 115 if opState.Hook != nil { 116 logger.Infof("found incomplete action %q; ignoring", opState.ActionId) 117 logger.Infof("recommitting prior %q hook", opState.Hook.Kind) 118 creator = newSkipHookOp(*opState.Hook) 119 } else { 120 logger.Infof("%q hook is nil", operation.RunAction) 121 } 122 case operation.RunHook: 123 switch opState.Step { 124 case operation.Pending: 125 logger.Infof("awaiting error resolution for %q hook", opState.Hook.Kind) 126 return ModeHookError, nil 127 case operation.Queued: 128 logger.Infof("found queued %q hook", opState.Hook.Kind) 129 // Ensure storage-attached hooks are run before install 130 // or upgrade hooks. 131 switch opState.Hook.Kind { 132 case hooks.UpgradeCharm: 133 // Force a refresh of all storage attachments, 134 // so we find out about new ones introduced 135 // by the charm upgrade. 136 if err := u.storage.Refresh(); err != nil { 137 return nil, errors.Trace(err) 138 } 139 fallthrough 140 case hooks.Install: 141 if err := waitStorage(u); err != nil { 142 return nil, errors.Trace(err) 143 } 144 } 145 creator = newRunHookOp(*opState.Hook) 146 case operation.Done: 147 logger.Infof("committing %q hook", opState.Hook.Kind) 148 creator = newSkipHookOp(*opState.Hook) 149 } 150 case operation.Continue: 151 if opState.Stopped { 152 logger.Infof("opState.Stopped == true; transition to ModeTerminating") 153 return ModeTerminating, nil 154 } 155 logger.Infof("no operations in progress; waiting for changes") 156 return ModeAbide, nil 157 default: 158 return nil, errors.Errorf("unknown operation kind %v", opState.Kind) 159 } 160 return continueAfter(u, creator) 161 } 162 163 // ModeInstalling is responsible for the initial charm deployment. If an install 164 // operation were to set an appropriate status, it shouldn't be necessary; but see 165 // ModeUpgrading for discussion relevant to both. 166 func ModeInstalling(curl *charm.URL) (next Mode, err error) { 167 name := fmt.Sprintf("ModeInstalling %s", curl) 168 return func(u *Uniter) (next Mode, err error) { 169 defer modeContext(name, &err)() 170 return continueAfter(u, newInstallOp(curl)) 171 }, nil 172 } 173 174 // ModeUpgrading is responsible for upgrading the charm. It shouldn't really 175 // need to be a mode at all -- it's just running a single operation -- but 176 // it's not safe to call it inside arbitrary other modes, because failing to 177 // pass through ModeContinue on the way out could cause a queued hook to be 178 // accidentally skipped. 179 func ModeUpgrading(curl *charm.URL) Mode { 180 name := fmt.Sprintf("ModeUpgrading %s", curl) 181 return func(u *Uniter) (next Mode, err error) { 182 defer modeContext(name, &err)() 183 return continueAfter(u, newUpgradeOp(curl)) 184 } 185 } 186 187 // ModeTerminating marks the unit dead and returns ErrTerminateAgent. 188 func ModeTerminating(u *Uniter) (next Mode, err error) { 189 defer modeContext("ModeTerminating", &err)() 190 w, err := u.unit.Watch() 191 if err != nil { 192 return nil, errors.Trace(err) 193 } 194 195 defer watcher.Stop(w, &u.tomb) 196 197 for { 198 select { 199 case <-u.tomb.Dying(): 200 return nil, tomb.ErrDying 201 case actionId := <-u.f.ActionEvents(): 202 creator := newActionOp(actionId) 203 if err := u.runOperation(creator); err != nil { 204 return nil, errors.Trace(err) 205 } 206 case _, ok := <-w.Changes(): 207 if !ok { 208 return nil, watcher.EnsureErr(w) 209 } 210 if err := u.unit.Refresh(); err != nil { 211 return nil, errors.Trace(err) 212 } 213 if hasSubs, err := u.unit.HasSubordinates(); err != nil { 214 return nil, errors.Trace(err) 215 } else if hasSubs { 216 continue 217 } 218 // The unit is known to be Dying; so if it didn't have subordinates 219 // just above, it can't acquire new ones before this call. 220 if err := u.unit.EnsureDead(); err != nil { 221 return nil, errors.Trace(err) 222 } 223 return nil, worker.ErrTerminateAgent 224 } 225 } 226 } 227 228 // ModeAbide is the Uniter's usual steady state. It watches for and responds to: 229 // * service configuration changes 230 // * charm upgrade requests 231 // * relation changes 232 // * unit death 233 // * acquisition or loss of service leadership 234 func ModeAbide(u *Uniter) (next Mode, err error) { 235 defer modeContext("ModeAbide", &err)() 236 opState := u.operationState() 237 if opState.Kind != operation.Continue { 238 return nil, errors.Errorf("insane uniter state: %#v", opState) 239 } 240 if err := u.deployer.Fix(); err != nil { 241 return nil, errors.Trace(err) 242 } 243 244 if !opState.Leader && !u.ranLeaderSettingsChanged { 245 creator := newSimpleRunHookOp(hook.LeaderSettingsChanged) 246 if err := u.runOperation(creator); err != nil { 247 return nil, errors.Trace(err) 248 } 249 } 250 251 if !u.ranConfigChanged { 252 return continueAfter(u, newSimpleRunHookOp(hooks.ConfigChanged)) 253 } 254 if !opState.Started { 255 return continueAfter(u, newSimpleRunHookOp(hooks.Start)) 256 } 257 u.f.WantUpgradeEvent(false) 258 u.relations.StartHooks() 259 defer func() { 260 if e := u.relations.StopHooks(); e != nil { 261 if err == nil { 262 err = e 263 } else { 264 logger.Errorf("error while stopping hooks: %v", e) 265 } 266 } 267 }() 268 269 select { 270 case <-u.f.UnitDying(): 271 return modeAbideDyingLoop(u) 272 default: 273 } 274 return modeAbideAliveLoop(u) 275 } 276 277 // idleWaitTime is the time after which, if there are no uniter events, 278 // the agent state becomes idle. 279 var idleWaitTime = 2 * time.Second 280 281 // modeAbideAliveLoop handles all state changes for ModeAbide when the unit 282 // is in an Alive state. 283 func modeAbideAliveLoop(u *Uniter) (Mode, error) { 284 var leaderElected, leaderDeposed <-chan struct{} 285 for { 286 // We expect one or none of these vars to be non-nil; and if none 287 // are, we set the one that should trigger when our leadership state 288 // differs from what we have recorded locally. 289 if leaderElected == nil && leaderDeposed == nil { 290 if u.operationState().Leader { 291 logger.Infof("waiting to lose leadership") 292 leaderDeposed = u.leadershipTracker.WaitMinion().Ready() 293 } else { 294 logger.Infof("waiting to gain leadership") 295 leaderElected = u.leadershipTracker.WaitLeader().Ready() 296 } 297 } 298 299 // collect-metrics hook 300 lastCollectMetrics := time.Unix(u.operationState().CollectMetricsTime, 0) 301 collectMetricsSignal := u.collectMetricsAt( 302 time.Now(), lastCollectMetrics, metricsPollInterval, 303 ) 304 305 // update-status hook 306 lastUpdateStatus := time.Unix(u.operationState().UpdateStatusTime, 0) 307 updateStatusSignal := u.updateStatusAt( 308 time.Now(), lastUpdateStatus, statusPollInterval, 309 ) 310 311 var creator creator 312 select { 313 case <-time.After(idleWaitTime): 314 if err := setAgentStatus(u, params.StatusIdle, "", nil); err != nil { 315 return nil, errors.Trace(err) 316 } 317 continue 318 case <-u.tomb.Dying(): 319 return nil, tomb.ErrDying 320 case <-u.f.UnitDying(): 321 return modeAbideDyingLoop(u) 322 case curl := <-u.f.UpgradeEvents(): 323 return ModeUpgrading(curl), nil 324 case ids := <-u.f.RelationsEvents(): 325 creator = newUpdateRelationsOp(ids) 326 case actionId := <-u.f.ActionEvents(): 327 creator = newActionOp(actionId) 328 case tags := <-u.f.StorageEvents(): 329 creator = newUpdateStorageOp(tags) 330 case <-u.f.ConfigEvents(): 331 creator = newSimpleRunHookOp(hooks.ConfigChanged) 332 case <-u.f.MeterStatusEvents(): 333 creator = newSimpleRunHookOp(hooks.MeterStatusChanged) 334 case <-collectMetricsSignal: 335 creator = newSimpleRunHookOp(hooks.CollectMetrics) 336 case <-updateStatusSignal: 337 creator = newSimpleRunHookOp(hooks.UpdateStatus) 338 case hookInfo := <-u.relations.Hooks(): 339 creator = newRunHookOp(hookInfo) 340 case hookInfo := <-u.storage.Hooks(): 341 creator = newRunHookOp(hookInfo) 342 case <-leaderElected: 343 // This operation queues a hook, better to let ModeContinue pick up 344 // after it than to duplicate queued-hook handling here. 345 return continueAfter(u, newAcceptLeadershipOp()) 346 case <-leaderDeposed: 347 leaderDeposed = nil 348 creator = newResignLeadershipOp() 349 case <-u.f.LeaderSettingsEvents(): 350 creator = newSimpleRunHookOp(hook.LeaderSettingsChanged) 351 } 352 if err := u.runOperation(creator); err != nil { 353 return nil, errors.Trace(err) 354 } 355 } 356 } 357 358 // modeAbideDyingLoop handles the proper termination of all relations in 359 // response to a Dying unit. 360 func modeAbideDyingLoop(u *Uniter) (next Mode, err error) { 361 if err := u.unit.Refresh(); err != nil { 362 return nil, errors.Trace(err) 363 } 364 if err = u.unit.DestroyAllSubordinates(); err != nil { 365 return nil, errors.Trace(err) 366 } 367 if err := u.relations.SetDying(); err != nil { 368 return nil, errors.Trace(err) 369 } 370 if u.operationState().Leader { 371 if err := u.runOperation(newResignLeadershipOp()); err != nil { 372 return nil, errors.Trace(err) 373 } 374 // TODO(fwereade): we ought to inform the tracker that we're shutting down 375 // (and no longer wish to continue renewing our lease) so that the tracker 376 // can then report minionhood at all times, and thus prevent the is-leader 377 // and leader-set hook tools from acting in a correct but misleading way 378 // (ie continuing to act as though leader after leader-deposed has run). 379 } 380 if err := u.storage.SetDying(); err != nil { 381 return nil, errors.Trace(err) 382 } 383 for { 384 if len(u.relations.GetInfo()) == 0 && u.storage.Empty() { 385 return continueAfter(u, newSimpleRunHookOp(hooks.Stop)) 386 } 387 var creator creator 388 select { 389 case <-u.tomb.Dying(): 390 return nil, tomb.ErrDying 391 case actionId := <-u.f.ActionEvents(): 392 creator = newActionOp(actionId) 393 case <-u.f.ConfigEvents(): 394 creator = newSimpleRunHookOp(hooks.ConfigChanged) 395 case <-u.f.LeaderSettingsEvents(): 396 creator = newSimpleRunHookOp(hook.LeaderSettingsChanged) 397 case hookInfo := <-u.relations.Hooks(): 398 creator = newRunHookOp(hookInfo) 399 case hookInfo := <-u.storage.Hooks(): 400 creator = newRunHookOp(hookInfo) 401 } 402 if err := u.runOperation(creator); err != nil { 403 return nil, errors.Trace(err) 404 } 405 } 406 } 407 408 // waitStorage waits until all storage attachments are provisioned 409 // and their hooks processed. 410 func waitStorage(u *Uniter) error { 411 if u.storage.Pending() == 0 { 412 return nil 413 } 414 logger.Infof("waiting for storage attachments") 415 for u.storage.Pending() > 0 { 416 var creator creator 417 select { 418 case <-u.tomb.Dying(): 419 return tomb.ErrDying 420 case <-u.f.UnitDying(): 421 // Unit is shutting down; no need to handle any 422 // more storage-attached hooks. We will process 423 // required storage-detaching hooks in ModeAbideDying. 424 return nil 425 case tags := <-u.f.StorageEvents(): 426 creator = newUpdateStorageOp(tags) 427 case hookInfo := <-u.storage.Hooks(): 428 creator = newRunHookOp(hookInfo) 429 } 430 if err := u.runOperation(creator); err != nil { 431 return errors.Trace(err) 432 } 433 } 434 logger.Infof("storage attachments ready") 435 return nil 436 } 437 438 // ModeHookError is responsible for watching and responding to: 439 // * user resolution of hook errors 440 // * forced charm upgrade requests 441 // * loss of service leadership 442 func ModeHookError(u *Uniter) (next Mode, err error) { 443 defer modeContext("ModeHookError", &err)() 444 opState := u.operationState() 445 if opState.Kind != operation.RunHook || opState.Step != operation.Pending { 446 return nil, errors.Errorf("insane uniter state: %#v", u.operationState()) 447 } 448 449 // Create error information for status. 450 hookInfo := *opState.Hook 451 hookName := string(hookInfo.Kind) 452 statusData := map[string]interface{}{} 453 if hookInfo.Kind.IsRelation() { 454 statusData["relation-id"] = hookInfo.RelationId 455 if hookInfo.RemoteUnit != "" { 456 statusData["remote-unit"] = hookInfo.RemoteUnit 457 } 458 relationName, err := u.relations.Name(hookInfo.RelationId) 459 if err != nil { 460 return nil, errors.Trace(err) 461 } 462 hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind) 463 } 464 statusData["hook"] = hookName 465 statusMessage := fmt.Sprintf("hook failed: %q", hookName) 466 467 // Run the select loop. 468 u.f.WantResolvedEvent() 469 u.f.WantUpgradeEvent(true) 470 var leaderDeposed <-chan struct{} 471 if opState.Leader { 472 leaderDeposed = u.leadershipTracker.WaitMinion().Ready() 473 } 474 for { 475 // The spec says we should set the workload status to Error, but that's crazy talk. 476 // It's the agent itself that should be in Error state. So we'll ensure the model is 477 // correct and translate before the user sees the data. 478 // ie a charm hook error results in agent error status, but is presented as a workload error. 479 if err = setAgentStatus(u, params.StatusError, statusMessage, statusData); err != nil { 480 return nil, errors.Trace(err) 481 } 482 select { 483 case <-u.tomb.Dying(): 484 return nil, tomb.ErrDying 485 case curl := <-u.f.UpgradeEvents(): 486 return ModeUpgrading(curl), nil 487 case rm := <-u.f.ResolvedEvents(): 488 var creator creator 489 switch rm { 490 case params.ResolvedRetryHooks: 491 creator = newRetryHookOp(hookInfo) 492 case params.ResolvedNoHooks: 493 creator = newSkipHookOp(hookInfo) 494 default: 495 return nil, errors.Errorf("unknown resolved mode %q", rm) 496 } 497 err := u.runOperation(creator) 498 if errors.Cause(err) == operation.ErrHookFailed { 499 continue 500 } else if err != nil { 501 return nil, errors.Trace(err) 502 } 503 return ModeContinue, nil 504 case actionId := <-u.f.ActionEvents(): 505 if err := u.runOperation(newActionOp(actionId)); err != nil { 506 return nil, errors.Trace(err) 507 } 508 case <-leaderDeposed: 509 // This should trigger at most once -- we can't reaccept leadership while 510 // in an error state. 511 leaderDeposed = nil 512 if err := u.runOperation(newResignLeadershipOp()); err != nil { 513 return nil, errors.Trace(err) 514 } 515 } 516 } 517 } 518 519 // ModeConflicted is responsible for watching and responding to: 520 // * user resolution of charm upgrade conflicts 521 // * forced charm upgrade requests 522 func ModeConflicted(curl *charm.URL) Mode { 523 return func(u *Uniter) (next Mode, err error) { 524 defer modeContext("ModeConflicted", &err)() 525 // TODO(mue) Add helpful data here too in later CL. 526 // The spec says we should set the workload status to Error, but that's crazy talk. 527 // It's the agent itself that should be in Error state. So we'll ensure the model is 528 // correct and translate before the user sees the data. 529 // ie a charm upgrade error results in agent error status, but is presented as a workload error. 530 if err := setAgentStatus(u, params.StatusError, "upgrade failed", nil); err != nil { 531 return nil, errors.Trace(err) 532 } 533 u.f.WantResolvedEvent() 534 u.f.WantUpgradeEvent(true) 535 var creator creator 536 select { 537 case <-u.tomb.Dying(): 538 return nil, tomb.ErrDying 539 case curl = <-u.f.UpgradeEvents(): 540 creator = newRevertUpgradeOp(curl) 541 case <-u.f.ResolvedEvents(): 542 creator = newResolvedUpgradeOp(curl) 543 } 544 return continueAfter(u, creator) 545 } 546 } 547 548 // modeContext returns a function that implements logging and common error 549 // manipulation for Mode funcs. 550 func modeContext(name string, err *error) func() { 551 logger.Infof("%s starting", name) 552 return func() { 553 logger.Infof("%s exiting", name) 554 *err = errors.Annotatef(*err, name) 555 } 556 } 557 558 // continueAfter is commonly used at the end of a Mode func to execute the 559 // operation returned by creator and return ModeContinue (or any error). 560 func continueAfter(u *Uniter, creator creator) (Mode, error) { 561 if err := u.runOperation(creator); err != nil { 562 return nil, errors.Trace(err) 563 } 564 return ModeContinue, nil 565 }