github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/task_runner.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/hashicorp/nomad/client/lib/cgutil" 12 "golang.org/x/exp/slices" 13 14 metrics "github.com/armon/go-metrics" 15 log "github.com/hashicorp/go-hclog" 16 multierror "github.com/hashicorp/go-multierror" 17 "github.com/hashicorp/hcl/v2/hcldec" 18 "github.com/hashicorp/nomad/client/allocdir" 19 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 20 "github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts" 21 "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" 22 "github.com/hashicorp/nomad/client/config" 23 "github.com/hashicorp/nomad/client/consul" 24 "github.com/hashicorp/nomad/client/devicemanager" 25 "github.com/hashicorp/nomad/client/dynamicplugins" 26 cinterfaces "github.com/hashicorp/nomad/client/interfaces" 27 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 28 "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" 29 "github.com/hashicorp/nomad/client/serviceregistration" 30 "github.com/hashicorp/nomad/client/serviceregistration/wrapper" 31 cstate "github.com/hashicorp/nomad/client/state" 32 cstructs "github.com/hashicorp/nomad/client/structs" 33 "github.com/hashicorp/nomad/client/taskenv" 34 "github.com/hashicorp/nomad/client/vaultclient" 35 "github.com/hashicorp/nomad/helper" 36 "github.com/hashicorp/nomad/helper/pluginutils/hclspecutils" 37 "github.com/hashicorp/nomad/helper/pluginutils/hclutils" 38 "github.com/hashicorp/nomad/helper/uuid" 39 "github.com/hashicorp/nomad/nomad/structs" 40 bstructs "github.com/hashicorp/nomad/plugins/base/structs" 41 "github.com/hashicorp/nomad/plugins/drivers" 42 ) 43 44 const ( 45 // defaultMaxEvents is the default max capacity for task events on the 46 // task state. Overrideable for testing. 47 defaultMaxEvents = 10 48 49 // killBackoffBaseline is the baseline time for exponential backoff while 50 // killing a task. 51 killBackoffBaseline = 5 * time.Second 52 53 // killBackoffLimit is the limit of the exponential backoff for killing 54 // the task. 55 killBackoffLimit = 2 * time.Minute 56 57 // killFailureLimit is how many times we will attempt to kill a task before 58 // giving up and potentially leaking resources. 59 killFailureLimit = 5 60 61 // triggerUpdateChCap is the capacity for the triggerUpdateCh used for 62 // triggering updates. It should be exactly 1 as even if multiple 63 // updates have come in since the last one was handled, we only need to 64 // handle the last one. 65 triggerUpdateChCap = 1 66 67 // restartChCap is the capacity for the restartCh used for triggering task 68 // restarts. It should be exactly 1 as even if multiple restarts have come 69 // we only need to handle the last one. 70 restartChCap = 1 71 ) 72 73 type TaskRunner struct { 74 // allocID, taskName, taskLeader, and taskResources are immutable so these fields may 75 // be accessed without locks 76 allocID string 77 taskName string 78 taskLeader bool 79 taskResources *structs.AllocatedTaskResources 80 81 alloc *structs.Allocation 82 allocLock sync.Mutex 83 84 clientConfig *config.Config 85 86 // stateUpdater is used to emit updated task state 87 stateUpdater interfaces.TaskStateHandler 88 89 // state captures the state of the task for updating the allocation 90 // Must acquire stateLock to access. 91 state *structs.TaskState 92 93 // localState captures the node-local state of the task for when the 94 // Nomad agent restarts. 95 // Must acquire stateLock to access. 96 localState *state.LocalState 97 98 // stateLock must be acquired when accessing state or localState. 99 stateLock sync.RWMutex 100 101 // stateDB is for persisting localState and taskState 102 stateDB cstate.StateDB 103 104 // restartCh is used to signal that the task should restart. 105 restartCh chan struct{} 106 107 // shutdownCtx is used to exit the TaskRunner *without* affecting task state. 108 shutdownCtx context.Context 109 110 // shutdownCtxCancel causes the TaskRunner to exit immediately without 111 // affecting task state. Useful for testing or graceful agent shutdown. 112 shutdownCtxCancel context.CancelFunc 113 114 // killCtx is the task runner's context representing the tasks's lifecycle. 115 // The context is canceled when the task is killed. 116 killCtx context.Context 117 118 // killCtxCancel is called when killing a task. 119 killCtxCancel context.CancelFunc 120 121 // killErr is populated when killing a task. Access should be done use the 122 // getter/setter 123 killErr error 124 killErrLock sync.Mutex 125 126 // shutdownDelayCtx is a context from the alloc runner which will 127 // tell us to exit early from shutdown_delay 128 shutdownDelayCtx context.Context 129 shutdownDelayCancelFn context.CancelFunc 130 131 // Logger is the logger for the task runner. 132 logger log.Logger 133 134 // triggerUpdateCh is ticked whenever update hooks need to be run and 135 // must be created with cap=1 to signal a pending update and prevent 136 // callers from deadlocking if the receiver has exited. 137 triggerUpdateCh chan struct{} 138 139 // waitCh is closed when the task runner has transitioned to a terminal 140 // state 141 waitCh chan struct{} 142 143 // driver is the driver for the task. 144 driver drivers.DriverPlugin 145 146 // driverCapabilities is the set capabilities the driver supports 147 driverCapabilities *drivers.Capabilities 148 149 // taskSchema is the hcl spec for the task driver configuration 150 taskSchema hcldec.Spec 151 152 // handleLock guards access to handle and handleResult 153 handleLock sync.Mutex 154 155 // handle to the running driver 156 handle *DriverHandle 157 158 // task is the task being run 159 task *structs.Task 160 taskLock sync.RWMutex 161 162 // taskDir is the directory structure for this task. 163 taskDir *allocdir.TaskDir 164 165 // envBuilder is used to build the task's environment 166 envBuilder *taskenv.Builder 167 168 // restartTracker is used to decide if the task should be restarted. 169 restartTracker *restarts.RestartTracker 170 171 // runnerHooks are task runner lifecycle hooks that should be run on state 172 // transistions. 173 runnerHooks []interfaces.TaskHook 174 175 // hookResources captures the resources provided by hooks 176 hookResources *hookResources 177 178 // consulClient is the client used by the consul service hook for 179 // registering services and checks 180 consulServiceClient serviceregistration.Handler 181 182 // consulProxiesClient is the client used by the envoy version hook for 183 // asking consul what version of envoy nomad should inject into the connect 184 // sidecar or gateway task. 185 consulProxiesClient consul.SupportedProxiesAPI 186 187 // sidsClient is the client used by the service identity hook for managing 188 // service identity tokens 189 siClient consul.ServiceIdentityAPI 190 191 // vaultClient is the client to use to derive and renew Vault tokens 192 vaultClient vaultclient.VaultClient 193 194 // vaultToken is the current Vault token. It should be accessed with the 195 // getter. 196 vaultToken string 197 vaultTokenLock sync.Mutex 198 199 // nomadToken is the current Nomad workload identity token. It 200 // should be accessed with the getter. 201 nomadToken string 202 nomadTokenLock sync.Mutex 203 204 // baseLabels are used when emitting tagged metrics. All task runner metrics 205 // will have these tags, and optionally more. 206 baseLabels []metrics.Label 207 208 // logmonHookConfig is used to get the paths to the stdout and stderr fifos 209 // to be passed to the driver for task logging 210 logmonHookConfig *logmonHookConfig 211 212 // resourceUsage is written via UpdateStats and read via 213 // LatestResourceUsage. May be nil at all times. 214 resourceUsage *cstructs.TaskResourceUsage 215 resourceUsageLock sync.Mutex 216 217 // deviceStatsReporter is used to lookup resource usage for alloc devices 218 deviceStatsReporter cinterfaces.DeviceStatsReporter 219 220 // csiManager is used to manage the mounting of CSI volumes into tasks 221 csiManager csimanager.Manager 222 223 // devicemanager is used to mount devices as well as lookup device 224 // statistics 225 devicemanager devicemanager.Manager 226 227 // cpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform 228 cpusetCgroupPathGetter cgutil.CgroupPathGetter 229 230 // driverManager is used to dispense driver plugins and register event 231 // handlers 232 driverManager drivermanager.Manager 233 234 // dynamicRegistry is where dynamic plugins should be registered. 235 dynamicRegistry dynamicplugins.Registry 236 237 // maxEvents is the capacity of the TaskEvents on the TaskState. 238 // Defaults to defaultMaxEvents but overrideable for testing. 239 maxEvents int 240 241 // serversContactedCh is passed to TaskRunners so they can detect when 242 // GetClientAllocs has been called in case of a failed restore. 243 serversContactedCh <-chan struct{} 244 245 // startConditionMetCh signals the TaskRunner when it should start the task 246 startConditionMetCh <-chan struct{} 247 248 // waitOnServers defaults to false but will be set true if a restore 249 // fails and the Run method should wait until serversContactedCh is 250 // closed. 251 waitOnServers bool 252 253 networkIsolationLock sync.Mutex 254 networkIsolationSpec *drivers.NetworkIsolationSpec 255 256 allocHookResources *cstructs.AllocHookResources 257 258 // serviceRegWrapper is the handler wrapper that is used by service hooks 259 // to perform service and check registration and deregistration. 260 serviceRegWrapper *wrapper.HandlerWrapper 261 262 // getter is an interface for retrieving artifacts. 263 getter cinterfaces.ArtifactGetter 264 } 265 266 type Config struct { 267 Alloc *structs.Allocation 268 ClientConfig *config.Config 269 Task *structs.Task 270 TaskDir *allocdir.TaskDir 271 Logger log.Logger 272 273 // Consul is the client to use for managing Consul service registrations 274 Consul serviceregistration.Handler 275 276 // ConsulProxies is the client to use for looking up supported envoy versions 277 // from Consul. 278 ConsulProxies consul.SupportedProxiesAPI 279 280 // ConsulSI is the client to use for managing Consul SI tokens 281 ConsulSI consul.ServiceIdentityAPI 282 283 // DynamicRegistry is where dynamic plugins should be registered. 284 DynamicRegistry dynamicplugins.Registry 285 286 // Vault is the client to use to derive and renew Vault tokens 287 Vault vaultclient.VaultClient 288 289 // StateDB is used to store and restore state. 290 StateDB cstate.StateDB 291 292 // StateUpdater is used to emit updated task state 293 StateUpdater interfaces.TaskStateHandler 294 295 // deviceStatsReporter is used to lookup resource usage for alloc devices 296 DeviceStatsReporter cinterfaces.DeviceStatsReporter 297 298 // CSIManager is used to manage the mounting of CSI volumes into tasks 299 CSIManager csimanager.Manager 300 301 // CpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform 302 CpusetCgroupPathGetter cgutil.CgroupPathGetter 303 304 // DeviceManager is used to mount devices as well as lookup device 305 // statistics 306 DeviceManager devicemanager.Manager 307 308 // DriverManager is used to dispense driver plugins and register event 309 // handlers 310 DriverManager drivermanager.Manager 311 312 // ServersContactedCh is closed when the first GetClientAllocs call to 313 // servers succeeds and allocs are synced. 314 ServersContactedCh chan struct{} 315 316 // StartConditionMetCh signals the TaskRunner when it should start the task 317 StartConditionMetCh <-chan struct{} 318 319 // ShutdownDelayCtx is a context from the alloc runner which will 320 // tell us to exit early from shutdown_delay 321 ShutdownDelayCtx context.Context 322 323 // ShutdownDelayCancelFn should only be used in testing. 324 ShutdownDelayCancelFn context.CancelFunc 325 326 // ServiceRegWrapper is the handler wrapper that is used by service hooks 327 // to perform service and check registration and deregistration. 328 ServiceRegWrapper *wrapper.HandlerWrapper 329 330 // Getter is an interface for retrieving artifacts. 331 Getter cinterfaces.ArtifactGetter 332 } 333 334 func NewTaskRunner(config *Config) (*TaskRunner, error) { 335 // Create a context for causing the runner to exit 336 trCtx, trCancel := context.WithCancel(context.Background()) 337 338 // Create a context for killing the runner 339 killCtx, killCancel := context.WithCancel(context.Background()) 340 341 // Initialize the environment builder 342 envBuilder := taskenv.NewBuilder( 343 config.ClientConfig.Node, 344 config.Alloc, 345 config.Task, 346 config.ClientConfig.Region, 347 ) 348 349 // Initialize state from alloc if it is set 350 tstate := structs.NewTaskState() 351 if ts := config.Alloc.TaskStates[config.Task.Name]; ts != nil { 352 tstate = ts.Copy() 353 } 354 355 tr := &TaskRunner{ 356 alloc: config.Alloc, 357 allocID: config.Alloc.ID, 358 clientConfig: config.ClientConfig, 359 task: config.Task, 360 taskDir: config.TaskDir, 361 taskName: config.Task.Name, 362 taskLeader: config.Task.Leader, 363 envBuilder: envBuilder, 364 dynamicRegistry: config.DynamicRegistry, 365 consulServiceClient: config.Consul, 366 consulProxiesClient: config.ConsulProxies, 367 siClient: config.ConsulSI, 368 vaultClient: config.Vault, 369 state: tstate, 370 localState: state.NewLocalState(), 371 stateDB: config.StateDB, 372 stateUpdater: config.StateUpdater, 373 deviceStatsReporter: config.DeviceStatsReporter, 374 killCtx: killCtx, 375 killCtxCancel: killCancel, 376 shutdownCtx: trCtx, 377 shutdownCtxCancel: trCancel, 378 triggerUpdateCh: make(chan struct{}, triggerUpdateChCap), 379 restartCh: make(chan struct{}, restartChCap), 380 waitCh: make(chan struct{}), 381 csiManager: config.CSIManager, 382 cpusetCgroupPathGetter: config.CpusetCgroupPathGetter, 383 devicemanager: config.DeviceManager, 384 driverManager: config.DriverManager, 385 maxEvents: defaultMaxEvents, 386 serversContactedCh: config.ServersContactedCh, 387 startConditionMetCh: config.StartConditionMetCh, 388 shutdownDelayCtx: config.ShutdownDelayCtx, 389 shutdownDelayCancelFn: config.ShutdownDelayCancelFn, 390 serviceRegWrapper: config.ServiceRegWrapper, 391 getter: config.Getter, 392 } 393 394 // Create the logger based on the allocation ID 395 tr.logger = config.Logger.Named("task_runner").With("task", config.Task.Name) 396 397 // Pull out the task's resources 398 ares := tr.alloc.AllocatedResources 399 if ares == nil { 400 return nil, fmt.Errorf("no task resources found on allocation") 401 } 402 403 tres, ok := ares.Tasks[tr.taskName] 404 if !ok { 405 return nil, fmt.Errorf("no task resources found on allocation") 406 } 407 tr.taskResources = tres 408 409 // Build the restart tracker. 410 rp := config.Task.RestartPolicy 411 if rp == nil { 412 tg := tr.alloc.Job.LookupTaskGroup(tr.alloc.TaskGroup) 413 if tg == nil { 414 tr.logger.Error("alloc missing task group") 415 return nil, fmt.Errorf("alloc missing task group") 416 } 417 rp = tg.RestartPolicy 418 } 419 tr.restartTracker = restarts.NewRestartTracker(rp, tr.alloc.Job.Type, config.Task.Lifecycle) 420 421 // Get the driver 422 if err := tr.initDriver(); err != nil { 423 tr.logger.Error("failed to create driver", "error", err) 424 return nil, err 425 } 426 427 // Use the client secret only as the initial value; the identity hook will 428 // update this with a workload identity if one is available 429 tr.setNomadToken(config.ClientConfig.Node.SecretID) 430 431 // Initialize the runners hooks. Must come after initDriver so hooks 432 // can use tr.driverCapabilities 433 tr.initHooks() 434 435 // Initialize base labels 436 tr.initLabels() 437 438 // Initialize initial task received event 439 tr.appendEvent(structs.NewTaskEvent(structs.TaskReceived)) 440 441 return tr, nil 442 } 443 444 func (tr *TaskRunner) initLabels() { 445 alloc := tr.Alloc() 446 tr.baseLabels = []metrics.Label{ 447 { 448 Name: "job", 449 Value: alloc.Job.Name, 450 }, 451 { 452 Name: "task_group", 453 Value: alloc.TaskGroup, 454 }, 455 { 456 Name: "alloc_id", 457 Value: tr.allocID, 458 }, 459 { 460 Name: "task", 461 Value: tr.taskName, 462 }, 463 { 464 Name: "namespace", 465 Value: tr.alloc.Namespace, 466 }, 467 } 468 469 if tr.alloc.Job.ParentID != "" { 470 tr.baseLabels = append(tr.baseLabels, metrics.Label{ 471 Name: "parent_id", 472 Value: tr.alloc.Job.ParentID, 473 }) 474 if strings.Contains(tr.alloc.Job.Name, "/dispatch-") { 475 tr.baseLabels = append(tr.baseLabels, metrics.Label{ 476 Name: "dispatch_id", 477 Value: strings.Split(tr.alloc.Job.Name, "/dispatch-")[1], 478 }) 479 } 480 if strings.Contains(tr.alloc.Job.Name, "/periodic-") { 481 tr.baseLabels = append(tr.baseLabels, metrics.Label{ 482 Name: "periodic_id", 483 Value: strings.Split(tr.alloc.Job.Name, "/periodic-")[1], 484 }) 485 } 486 } 487 } 488 489 // MarkFailedDead marks a task as failed and not to run. Aimed to be invoked 490 // when alloc runner prestart hooks failed. Should never be called with Run(). 491 func (tr *TaskRunner) MarkFailedDead(reason string) { 492 defer close(tr.waitCh) 493 494 tr.stateLock.Lock() 495 if err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState); err != nil { 496 //TODO Nomad will be unable to restore this task; try to kill 497 // it now and fail? In general we prefer to leave running 498 // tasks running even if the agent encounters an error. 499 tr.logger.Warn("error persisting local failed task state; may be unable to restore after a Nomad restart", 500 "error", err) 501 } 502 tr.stateLock.Unlock() 503 504 event := structs.NewTaskEvent(structs.TaskSetupFailure). 505 SetDisplayMessage(reason). 506 SetFailsTask() 507 tr.UpdateState(structs.TaskStateDead, event) 508 509 // Run the stop hooks in case task was a restored task that failed prestart 510 if err := tr.stop(); err != nil { 511 tr.logger.Error("stop failed while marking task dead", "error", err) 512 } 513 } 514 515 // Run the TaskRunner. Starts the user's task or reattaches to a restored task. 516 // Run closes WaitCh when it exits. Should be started in a goroutine. 517 func (tr *TaskRunner) Run() { 518 defer close(tr.waitCh) 519 var result *drivers.ExitResult 520 521 tr.stateLock.RLock() 522 dead := tr.state.State == structs.TaskStateDead 523 runComplete := tr.localState.RunComplete 524 tr.stateLock.RUnlock() 525 526 // If restoring a dead task, ensure the task is cleared and, if the local 527 // state indicates that the previous Run() call is complete, execute all 528 // post stop hooks and exit early, otherwise proceed until the 529 // ALLOC_RESTART loop skipping MAIN since the task is dead. 530 if dead { 531 // do cleanup functions without emitting any additional events/work 532 // to handle cases where we restored a dead task where client terminated 533 // after task finished before completing post-run actions. 534 tr.clearDriverHandle() 535 tr.stateUpdater.TaskStateUpdated() 536 if runComplete { 537 if err := tr.stop(); err != nil { 538 tr.logger.Error("stop failed on terminal task", "error", err) 539 } 540 return 541 } 542 } 543 544 // Updates are handled asynchronously with the other hooks but each 545 // triggered update - whether due to alloc updates or a new vault token 546 // - should be handled serially. 547 go tr.handleUpdates() 548 549 // If restore failed wait until servers are contacted before running. 550 // #1795 551 if tr.waitOnServers { 552 tr.logger.Info("task failed to restore; waiting to contact server before restarting") 553 select { 554 case <-tr.killCtx.Done(): 555 tr.logger.Info("task killed while waiting for server contact") 556 case <-tr.shutdownCtx.Done(): 557 return 558 case <-tr.serversContactedCh: 559 tr.logger.Info("server contacted; unblocking waiting task") 560 } 561 } 562 563 // Set the initial task state. 564 tr.stateUpdater.TaskStateUpdated() 565 566 // start with a stopped timer; actual restart delay computed later 567 timer, stop := helper.NewStoppedTimer() 568 defer stop() 569 570 MAIN: 571 for !tr.shouldShutdown() { 572 if dead { 573 break 574 } 575 576 select { 577 case <-tr.killCtx.Done(): 578 break MAIN 579 case <-tr.shutdownCtx.Done(): 580 // TaskRunner was told to exit immediately 581 return 582 case <-tr.startConditionMetCh: 583 tr.logger.Debug("lifecycle start condition has been met, proceeding") 584 // yay proceed 585 } 586 587 // Run the prestart hooks 588 if err := tr.prestart(); err != nil { 589 tr.logger.Error("prestart failed", "error", err) 590 tr.restartTracker.SetStartError(err) 591 goto RESTART 592 } 593 594 select { 595 case <-tr.killCtx.Done(): 596 break MAIN 597 case <-tr.shutdownCtx.Done(): 598 // TaskRunner was told to exit immediately 599 return 600 default: 601 } 602 603 // Run the task 604 if err := tr.runDriver(); err != nil { 605 tr.logger.Error("running driver failed", "error", err) 606 tr.restartTracker.SetStartError(err) 607 goto RESTART 608 } 609 610 // Run the poststart hooks 611 if err := tr.poststart(); err != nil { 612 tr.logger.Error("poststart failed", "error", err) 613 } 614 615 // Grab the result proxy and wait for task to exit 616 WAIT: 617 { 618 handle := tr.getDriverHandle() 619 result = nil 620 621 // Do *not* use tr.killCtx here as it would cause 622 // Wait() to unblock before the task exits when Kill() 623 // is called. 624 if resultCh, err := handle.WaitCh(context.Background()); err != nil { 625 tr.logger.Error("wait task failed", "error", err) 626 } else { 627 select { 628 case <-tr.killCtx.Done(): 629 // We can go through the normal should restart check since 630 // the restart tracker knowns it is killed 631 result = tr.handleKill(resultCh) 632 case <-tr.shutdownCtx.Done(): 633 // TaskRunner was told to exit immediately 634 return 635 case result = <-resultCh: 636 } 637 638 // WaitCh returned a result 639 if retryWait := tr.handleTaskExitResult(result); retryWait { 640 goto WAIT 641 } 642 } 643 } 644 645 // Clear the handle 646 tr.clearDriverHandle() 647 648 // Store the wait result on the restart tracker 649 tr.restartTracker.SetExitResult(result) 650 651 if err := tr.exited(); err != nil { 652 tr.logger.Error("exited hooks failed", "error", err) 653 } 654 655 RESTART: 656 restart, restartDelay := tr.shouldRestart() 657 if !restart { 658 break MAIN 659 } 660 661 timer.Reset(restartDelay) 662 663 // Actually restart by sleeping and also watching for destroy events 664 select { 665 case <-timer.C: 666 case <-tr.killCtx.Done(): 667 tr.logger.Trace("task killed between restarts", "delay", restartDelay) 668 break MAIN 669 case <-tr.shutdownCtx.Done(): 670 // TaskRunner was told to exit immediately 671 tr.logger.Trace("gracefully shutting down during restart delay") 672 return 673 } 674 } 675 676 // Ensure handle is cleaned up. Restore could have recovered a task 677 // that should be terminal, so if the handle still exists we should 678 // kill it here. 679 if tr.getDriverHandle() != nil { 680 if result = tr.handleKill(nil); result != nil { 681 tr.emitExitResultEvent(result) 682 } 683 684 tr.clearDriverHandle() 685 686 if err := tr.exited(); err != nil { 687 tr.logger.Error("exited hooks failed while cleaning up terminal task", "error", err) 688 } 689 } 690 691 // Mark the task as dead 692 tr.UpdateState(structs.TaskStateDead, nil) 693 694 // Wait here in case the allocation is restarted. Poststop tasks will never 695 // run again so skip them to avoid blocking forever. 696 if !tr.Task().IsPoststop() { 697 ALLOC_RESTART: 698 // Run in a loop to handle cases where restartCh is triggered but the 699 // task runner doesn't need to restart. 700 for { 701 select { 702 case <-tr.killCtx.Done(): 703 break ALLOC_RESTART 704 case <-tr.shutdownCtx.Done(): 705 return 706 case <-tr.restartCh: 707 // Restart without delay since the task is not running anymore. 708 restart, _ := tr.shouldRestart() 709 if restart { 710 // Set runner as not dead to allow the MAIN loop to run. 711 dead = false 712 goto MAIN 713 } 714 } 715 } 716 } 717 718 tr.stateLock.Lock() 719 tr.localState.RunComplete = true 720 err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState) 721 if err != nil { 722 tr.logger.Warn("error persisting task state on run loop exit", "error", err) 723 } 724 tr.stateLock.Unlock() 725 726 // Run the stop hooks 727 if err := tr.stop(); err != nil { 728 tr.logger.Error("stop failed", "error", err) 729 } 730 731 tr.logger.Debug("task run loop exiting") 732 } 733 734 func (tr *TaskRunner) shouldShutdown() bool { 735 alloc := tr.Alloc() 736 if alloc.ClientTerminalStatus() { 737 return true 738 } 739 740 if !tr.IsPoststopTask() && alloc.ServerTerminalStatus() { 741 return true 742 } 743 744 return false 745 } 746 747 // handleTaskExitResult handles the results returned by the task exiting. If 748 // retryWait is true, the caller should attempt to wait on the task again since 749 // it has not actually finished running. This can happen if the driver plugin 750 // has exited. 751 func (tr *TaskRunner) handleTaskExitResult(result *drivers.ExitResult) (retryWait bool) { 752 if result == nil { 753 return false 754 } 755 756 if result.Err == bstructs.ErrPluginShutdown { 757 dn := tr.Task().Driver 758 tr.logger.Debug("driver plugin has shutdown; attempting to recover task", "driver", dn) 759 760 // Initialize a new driver handle 761 if err := tr.initDriver(); err != nil { 762 tr.logger.Error("failed to initialize driver after it exited unexpectedly", "error", err, "driver", dn) 763 return false 764 } 765 766 // Try to restore the handle 767 tr.stateLock.RLock() 768 h := tr.localState.TaskHandle 769 net := tr.localState.DriverNetwork 770 tr.stateLock.RUnlock() 771 if !tr.restoreHandle(h, net) { 772 tr.logger.Error("failed to restore handle on driver after it exited unexpectedly", "driver", dn) 773 return false 774 } 775 776 tr.logger.Debug("task successfully recovered on driver", "driver", dn) 777 return true 778 } 779 780 // Emit Terminated event 781 tr.emitExitResultEvent(result) 782 783 return false 784 } 785 786 // emitExitResultEvent emits a TaskTerminated event for an ExitResult. 787 func (tr *TaskRunner) emitExitResultEvent(result *drivers.ExitResult) { 788 event := structs.NewTaskEvent(structs.TaskTerminated). 789 SetExitCode(result.ExitCode). 790 SetSignal(result.Signal). 791 SetOOMKilled(result.OOMKilled). 792 SetExitMessage(result.Err) 793 794 tr.EmitEvent(event) 795 796 if result.OOMKilled { 797 metrics.IncrCounterWithLabels([]string{"client", "allocs", "oom_killed"}, 1, tr.baseLabels) 798 } 799 } 800 801 // handleUpdates runs update hooks when triggerUpdateCh is ticked and exits 802 // when Run has returned. Should only be run in a goroutine from Run. 803 func (tr *TaskRunner) handleUpdates() { 804 for { 805 select { 806 case <-tr.triggerUpdateCh: 807 case <-tr.waitCh: 808 return 809 } 810 811 // Non-terminal update; run hooks 812 tr.updateHooks() 813 } 814 } 815 816 // shouldRestart determines whether the task should be restarted and updates 817 // the task state unless the task is killed or terminated. 818 func (tr *TaskRunner) shouldRestart() (bool, time.Duration) { 819 // Determine if we should restart 820 state, when := tr.restartTracker.GetState() 821 reason := tr.restartTracker.GetReason() 822 switch state { 823 case structs.TaskKilled: 824 // Never restart an explicitly killed task. Kill method handles 825 // updating the server. 826 tr.EmitEvent(structs.NewTaskEvent(state)) 827 return false, 0 828 case structs.TaskNotRestarting, structs.TaskTerminated: 829 tr.logger.Info("not restarting task", "reason", reason) 830 if state == structs.TaskNotRestarting { 831 tr.UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskNotRestarting).SetRestartReason(reason).SetFailsTask()) 832 } 833 return false, 0 834 case structs.TaskRestarting: 835 tr.logger.Info("restarting task", "reason", reason, "delay", when) 836 tr.UpdateState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskRestarting).SetRestartDelay(when).SetRestartReason(reason)) 837 return true, when 838 default: 839 tr.logger.Error("restart tracker returned unknown state", "state", state) 840 return true, when 841 } 842 } 843 844 // runDriver runs the driver and waits for it to exit 845 // runDriver emits an appropriate task event on success/failure 846 func (tr *TaskRunner) runDriver() error { 847 848 taskConfig := tr.buildTaskConfig() 849 if tr.cpusetCgroupPathGetter != nil { 850 tr.logger.Trace("waiting for cgroup to exist for", "allocID", tr.allocID, "task", tr.task) 851 cpusetCgroupPath, err := tr.cpusetCgroupPathGetter(tr.killCtx) 852 if err != nil { 853 return err 854 } 855 taskConfig.Resources.LinuxResources.CpusetCgroupPath = cpusetCgroupPath 856 } 857 858 // Build hcl context variables 859 vars, errs, err := tr.envBuilder.Build().AllValues() 860 if err != nil { 861 return fmt.Errorf("error building environment variables: %v", err) 862 } 863 864 // Handle per-key errors 865 if len(errs) > 0 { 866 keys := make([]string, 0, len(errs)) 867 for k, err := range errs { 868 keys = append(keys, k) 869 870 if tr.logger.IsTrace() { 871 // Verbosely log every diagnostic for debugging 872 tr.logger.Trace("error building environment variables", "key", k, "error", err) 873 } 874 } 875 876 tr.logger.Warn("some environment variables not available for rendering", "keys", strings.Join(keys, ", ")) 877 } 878 879 val, diag, diagErrs := hclutils.ParseHclInterface(tr.task.Config, tr.taskSchema, vars) 880 if diag.HasErrors() { 881 parseErr := multierror.Append(errors.New("failed to parse config: "), diagErrs...) 882 tr.EmitEvent(structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(parseErr)) 883 return parseErr 884 } 885 886 if err := taskConfig.EncodeDriverConfig(val); err != nil { 887 encodeErr := fmt.Errorf("failed to encode driver config: %v", err) 888 tr.EmitEvent(structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(encodeErr)) 889 return encodeErr 890 } 891 892 // If there's already a task handle (eg from a Restore) there's nothing 893 // to do except update state. 894 if tr.getDriverHandle() != nil { 895 // Ensure running state is persisted but do *not* append a new 896 // task event as restoring is a client event and not relevant 897 // to a task's lifecycle. 898 if err := tr.updateStateImpl(structs.TaskStateRunning); err != nil { 899 //TODO return error and destroy task to avoid an orphaned task? 900 tr.logger.Warn("error persisting task state", "error", err) 901 } 902 return nil 903 } 904 905 // Start the job if there's no existing handle (or if RecoverTask failed) 906 handle, net, err := tr.driver.StartTask(taskConfig) 907 if err != nil { 908 // The plugin has died, try relaunching it 909 if err == bstructs.ErrPluginShutdown { 910 tr.logger.Info("failed to start task because plugin shutdown unexpectedly; attempting to recover") 911 if err := tr.initDriver(); err != nil { 912 taskErr := fmt.Errorf("failed to initialize driver after it exited unexpectedly: %v", err) 913 tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(taskErr)) 914 return taskErr 915 } 916 917 handle, net, err = tr.driver.StartTask(taskConfig) 918 if err != nil { 919 taskErr := fmt.Errorf("failed to start task after driver exited unexpectedly: %v", err) 920 tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(taskErr)) 921 return taskErr 922 } 923 } else { 924 // Do *NOT* wrap the error here without maintaining whether or not is Recoverable. 925 // You must emit a task event failure to be considered Recoverable 926 tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err)) 927 return err 928 } 929 } 930 931 tr.stateLock.Lock() 932 tr.localState.TaskHandle = handle 933 tr.localState.DriverNetwork = net 934 if err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState); err != nil { 935 //TODO Nomad will be unable to restore this task; try to kill 936 // it now and fail? In general we prefer to leave running 937 // tasks running even if the agent encounters an error. 938 tr.logger.Warn("error persisting local task state; may be unable to restore after a Nomad restart", 939 "error", err, "task_id", handle.Config.ID) 940 } 941 tr.stateLock.Unlock() 942 943 tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net)) 944 945 // Emit an event that we started 946 tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 947 return nil 948 } 949 950 // initDriver retrives the DriverPlugin from the plugin loader for this task 951 func (tr *TaskRunner) initDriver() error { 952 driver, err := tr.driverManager.Dispense(tr.Task().Driver) 953 if err != nil { 954 return err 955 } 956 tr.driver = driver 957 958 schema, err := tr.driver.TaskConfigSchema() 959 if err != nil { 960 return err 961 } 962 spec, diag := hclspecutils.Convert(schema) 963 if diag.HasErrors() { 964 return multierror.Append(errors.New("failed to convert task schema"), diag.Errs()...) 965 } 966 tr.taskSchema = spec 967 968 caps, err := tr.driver.Capabilities() 969 if err != nil { 970 return err 971 } 972 tr.driverCapabilities = caps 973 974 return nil 975 } 976 977 // handleKill is used to handle the a request to kill a task. It will return 978 // the handle exit result if one is available and store any error in the task 979 // runner killErr value. 980 func (tr *TaskRunner) handleKill(resultCh <-chan *drivers.ExitResult) *drivers.ExitResult { 981 // Run the pre killing hooks 982 tr.preKill() 983 984 // Wait for task ShutdownDelay after running prekill hooks 985 // This allows for things like service de-registration to run 986 // before waiting to kill task 987 if delay := tr.Task().ShutdownDelay; delay != 0 { 988 tr.logger.Debug("waiting before killing task", "shutdown_delay", delay) 989 990 ev := structs.NewTaskEvent(structs.TaskWaitingShuttingDownDelay). 991 SetDisplayMessage(fmt.Sprintf("Waiting for shutdown_delay of %s before killing the task.", delay)) 992 tr.UpdateState(structs.TaskStatePending, ev) 993 994 select { 995 case result := <-resultCh: 996 return result 997 case <-tr.shutdownDelayCtx.Done(): 998 break 999 case <-time.After(delay): 1000 } 1001 } 1002 1003 // Tell the restart tracker that the task has been killed so it doesn't 1004 // attempt to restart it. 1005 tr.restartTracker.SetKilled() 1006 1007 // Check it is running 1008 select { 1009 case result := <-resultCh: 1010 return result 1011 default: 1012 } 1013 1014 handle := tr.getDriverHandle() 1015 if handle == nil { 1016 return nil 1017 } 1018 1019 // Kill the task using an exponential backoff in-case of failures. 1020 result, killErr := tr.killTask(handle, resultCh) 1021 if killErr != nil { 1022 // We couldn't successfully destroy the resource created. 1023 tr.logger.Error("failed to kill task. Resources may have been leaked", "error", killErr) 1024 tr.setKillErr(killErr) 1025 } 1026 1027 if result != nil { 1028 return result 1029 } 1030 1031 // Block until task has exited. 1032 if resultCh == nil { 1033 var err error 1034 resultCh, err = handle.WaitCh(tr.shutdownCtx) 1035 1036 // The error should be nil or TaskNotFound, if it's something else then a 1037 // failure in the driver or transport layer occurred 1038 if err != nil { 1039 if err == drivers.ErrTaskNotFound { 1040 return nil 1041 } 1042 tr.logger.Error("failed to wait on task. Resources may have been leaked", "error", err) 1043 tr.setKillErr(killErr) 1044 return nil 1045 } 1046 } 1047 1048 select { 1049 case result := <-resultCh: 1050 return result 1051 case <-tr.shutdownCtx.Done(): 1052 return nil 1053 } 1054 } 1055 1056 // killTask kills the task handle. In the case that killing fails, 1057 // killTask will retry with an exponential backoff and will give up at a 1058 // given limit. Returns an error if the task could not be killed. 1059 func (tr *TaskRunner) killTask(handle *DriverHandle, resultCh <-chan *drivers.ExitResult) (*drivers.ExitResult, error) { 1060 // Cap the number of times we attempt to kill the task. 1061 var err error 1062 for i := 0; i < killFailureLimit; i++ { 1063 if err = handle.Kill(); err != nil { 1064 if err == drivers.ErrTaskNotFound { 1065 tr.logger.Warn("couldn't find task to kill", "task_id", handle.ID()) 1066 return nil, nil 1067 } 1068 // Calculate the new backoff 1069 backoff := (1 << (2 * uint64(i))) * killBackoffBaseline 1070 if backoff > killBackoffLimit { 1071 backoff = killBackoffLimit 1072 } 1073 1074 tr.logger.Error("failed to kill task", "backoff", backoff, "error", err) 1075 select { 1076 case result := <-resultCh: 1077 return result, nil 1078 case <-time.After(backoff): 1079 } 1080 } else { 1081 // Kill was successful 1082 return nil, nil 1083 } 1084 } 1085 return nil, err 1086 } 1087 1088 // persistLocalState persists local state to disk synchronously. 1089 func (tr *TaskRunner) persistLocalState() error { 1090 tr.stateLock.RLock() 1091 defer tr.stateLock.RUnlock() 1092 1093 return tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState) 1094 } 1095 1096 // buildTaskConfig builds a drivers.TaskConfig with an unique ID for the task. 1097 // The ID is unique for every invocation, it is built from the alloc ID, task 1098 // name and 8 random characters. 1099 func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig { 1100 task := tr.Task() 1101 alloc := tr.Alloc() 1102 invocationid := uuid.Generate()[:8] 1103 taskResources := tr.taskResources 1104 ports := tr.Alloc().AllocatedResources.Shared.Ports 1105 env := tr.envBuilder.Build() 1106 tr.networkIsolationLock.Lock() 1107 defer tr.networkIsolationLock.Unlock() 1108 1109 var dns *drivers.DNSConfig 1110 if alloc.AllocatedResources != nil && len(alloc.AllocatedResources.Shared.Networks) > 0 { 1111 allocDNS := alloc.AllocatedResources.Shared.Networks[0].DNS 1112 if allocDNS != nil { 1113 interpolatedNetworks := taskenv.InterpolateNetworks(env, alloc.AllocatedResources.Shared.Networks) 1114 dns = &drivers.DNSConfig{ 1115 Servers: interpolatedNetworks[0].DNS.Servers, 1116 Searches: interpolatedNetworks[0].DNS.Searches, 1117 Options: interpolatedNetworks[0].DNS.Options, 1118 } 1119 } 1120 } 1121 1122 memoryLimit := taskResources.Memory.MemoryMB 1123 if max := taskResources.Memory.MemoryMaxMB; max > memoryLimit { 1124 memoryLimit = max 1125 } 1126 1127 cpusetCpus := make([]string, len(taskResources.Cpu.ReservedCores)) 1128 for i, v := range taskResources.Cpu.ReservedCores { 1129 cpusetCpus[i] = fmt.Sprintf("%d", v) 1130 } 1131 1132 return &drivers.TaskConfig{ 1133 ID: fmt.Sprintf("%s/%s/%s", alloc.ID, task.Name, invocationid), 1134 Name: task.Name, 1135 JobName: alloc.Job.Name, 1136 JobID: alloc.Job.ID, 1137 TaskGroupName: alloc.TaskGroup, 1138 Namespace: alloc.Namespace, 1139 NodeName: alloc.NodeName, 1140 NodeID: alloc.NodeID, 1141 Resources: &drivers.Resources{ 1142 NomadResources: taskResources, 1143 LinuxResources: &drivers.LinuxResources{ 1144 MemoryLimitBytes: memoryLimit * 1024 * 1024, 1145 CPUShares: taskResources.Cpu.CpuShares, 1146 CpusetCpus: strings.Join(cpusetCpus, ","), 1147 PercentTicks: float64(taskResources.Cpu.CpuShares) / float64(tr.clientConfig.Node.NodeResources.Cpu.CpuShares), 1148 }, 1149 Ports: &ports, 1150 }, 1151 Devices: tr.hookResources.getDevices(), 1152 Mounts: tr.hookResources.getMounts(), 1153 Env: env.Map(), 1154 DeviceEnv: env.DeviceEnv(), 1155 User: task.User, 1156 AllocDir: tr.taskDir.AllocDir, 1157 StdoutPath: tr.logmonHookConfig.stdoutFifo, 1158 StderrPath: tr.logmonHookConfig.stderrFifo, 1159 AllocID: tr.allocID, 1160 NetworkIsolation: tr.networkIsolationSpec, 1161 DNS: dns, 1162 } 1163 } 1164 1165 // Restore task runner state. Called by AllocRunner.Restore after NewTaskRunner 1166 // but before Run so no locks need to be acquired. 1167 func (tr *TaskRunner) Restore() error { 1168 ls, ts, err := tr.stateDB.GetTaskRunnerState(tr.allocID, tr.taskName) 1169 if err != nil { 1170 return err 1171 } 1172 1173 if ls != nil { 1174 ls.Canonicalize() 1175 tr.localState = ls 1176 } 1177 1178 if ts != nil { 1179 ts.Canonicalize() 1180 tr.state = ts 1181 } 1182 1183 // If a TaskHandle was persisted, ensure it is valid or destroy it. 1184 if taskHandle := tr.localState.TaskHandle; taskHandle != nil { 1185 //TODO if RecoverTask returned the DriverNetwork we wouldn't 1186 // have to persist it at all! 1187 restored := tr.restoreHandle(taskHandle, tr.localState.DriverNetwork) 1188 1189 // If the handle could not be restored, the alloc is 1190 // non-terminal, and the task isn't a system job: wait until 1191 // servers have been contacted before running. #1795 1192 if restored { 1193 return nil 1194 } 1195 1196 alloc := tr.Alloc() 1197 if tr.state.State == structs.TaskStateDead || alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem { 1198 return nil 1199 } 1200 1201 tr.logger.Trace("failed to reattach to task; will not run until server is contacted") 1202 tr.waitOnServers = true 1203 1204 ev := structs.NewTaskEvent(structs.TaskRestoreFailed). 1205 SetDisplayMessage("failed to restore task; will not run until server is contacted") 1206 tr.UpdateState(structs.TaskStatePending, ev) 1207 } 1208 1209 return nil 1210 } 1211 1212 // restoreHandle ensures a TaskHandle is valid by calling Driver.RecoverTask 1213 // and sets the driver handle. If the TaskHandle is not valid, DestroyTask is 1214 // called. 1215 func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *drivers.DriverNetwork) (success bool) { 1216 // Ensure handle is well-formed 1217 if taskHandle.Config == nil { 1218 return true 1219 } 1220 1221 if err := tr.driver.RecoverTask(taskHandle); err != nil { 1222 if tr.TaskState().State != structs.TaskStateRunning { 1223 // RecoverTask should fail if the Task wasn't running 1224 return true 1225 } 1226 1227 tr.logger.Error("error recovering task; cleaning up", 1228 "error", err, "task_id", taskHandle.Config.ID) 1229 1230 // Try to cleanup any existing task state in the plugin before restarting 1231 if err := tr.driver.DestroyTask(taskHandle.Config.ID, true); err != nil { 1232 // Ignore ErrTaskNotFound errors as ideally 1233 // this task has already been stopped and 1234 // therefore doesn't exist. 1235 if err != drivers.ErrTaskNotFound { 1236 tr.logger.Warn("error destroying unrecoverable task", 1237 "error", err, "task_id", taskHandle.Config.ID) 1238 } 1239 1240 } 1241 1242 return false 1243 } 1244 1245 // Update driver handle on task runner 1246 tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net)) 1247 return true 1248 } 1249 1250 // UpdateState sets the task runners allocation state and triggers a server 1251 // update. 1252 func (tr *TaskRunner) UpdateState(state string, event *structs.TaskEvent) { 1253 tr.stateLock.Lock() 1254 defer tr.stateLock.Unlock() 1255 1256 tr.logger.Trace("setting task state", "state", state) 1257 1258 if event != nil { 1259 tr.logger.Trace("appending task event", "state", state, "event", event.Type) 1260 1261 // Append the event 1262 tr.appendEvent(event) 1263 } 1264 1265 // Update the state 1266 if err := tr.updateStateImpl(state); err != nil { 1267 // Only log the error as we persistence errors should not 1268 // affect task state. 1269 tr.logger.Error("error persisting task state", "error", err, "event", event, "state", state) 1270 } 1271 1272 // Store task handle for remote tasks 1273 if tr.driverCapabilities != nil && tr.driverCapabilities.RemoteTasks { 1274 tr.logger.Trace("storing remote task handle state") 1275 tr.localState.TaskHandle.Store(tr.state) 1276 } 1277 1278 // Notify the alloc runner of the transition 1279 tr.stateUpdater.TaskStateUpdated() 1280 } 1281 1282 // updateStateImpl updates the in-memory task state and persists to disk. 1283 func (tr *TaskRunner) updateStateImpl(state string) error { 1284 1285 // Update the task state 1286 oldState := tr.state.State 1287 taskState := tr.state 1288 taskState.State = state 1289 1290 // Handle the state transition. 1291 switch state { 1292 case structs.TaskStateRunning: 1293 // Capture the start time if it is just starting 1294 if oldState != structs.TaskStateRunning { 1295 taskState.StartedAt = time.Now().UTC() 1296 metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels) 1297 } 1298 case structs.TaskStateDead: 1299 // Capture the finished time if not already set 1300 if taskState.FinishedAt.IsZero() { 1301 taskState.FinishedAt = time.Now().UTC() 1302 } 1303 1304 // Emitting metrics to indicate task complete and failures 1305 if taskState.Failed { 1306 metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels) 1307 } else { 1308 metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels) 1309 } 1310 } 1311 1312 // Persist the state and event 1313 return tr.stateDB.PutTaskState(tr.allocID, tr.taskName, taskState) 1314 } 1315 1316 // EmitEvent appends a new TaskEvent to this task's TaskState. The actual 1317 // TaskState.State (pending, running, dead) is not changed. Use UpdateState to 1318 // transition states. 1319 // Events are persisted locally and sent to the server, but errors are simply 1320 // logged. Use AppendEvent to simply add a new event. 1321 func (tr *TaskRunner) EmitEvent(event *structs.TaskEvent) { 1322 tr.stateLock.Lock() 1323 defer tr.stateLock.Unlock() 1324 1325 tr.appendEvent(event) 1326 1327 if err := tr.stateDB.PutTaskState(tr.allocID, tr.taskName, tr.state); err != nil { 1328 // Only a warning because the next event/state-transition will 1329 // try to persist it again. 1330 tr.logger.Warn("error persisting event", "error", err, "event", event) 1331 } 1332 1333 // Notify the alloc runner of the event 1334 tr.stateUpdater.TaskStateUpdated() 1335 } 1336 1337 // AppendEvent appends a new TaskEvent to this task's TaskState. The actual 1338 // TaskState.State (pending, running, dead) is not changed. Use UpdateState to 1339 // transition states. 1340 // Events are persisted locally and errors are simply logged. Use EmitEvent 1341 // also update AllocRunner. 1342 func (tr *TaskRunner) AppendEvent(event *structs.TaskEvent) { 1343 tr.stateLock.Lock() 1344 defer tr.stateLock.Unlock() 1345 1346 tr.appendEvent(event) 1347 1348 if err := tr.stateDB.PutTaskState(tr.allocID, tr.taskName, tr.state); err != nil { 1349 // Only a warning because the next event/state-transition will 1350 // try to persist it again. 1351 tr.logger.Warn("error persisting event", "error", err, "event", event) 1352 } 1353 } 1354 1355 // appendEvent to task's event slice. Caller must acquire stateLock. 1356 func (tr *TaskRunner) appendEvent(event *structs.TaskEvent) error { 1357 // Ensure the event is populated with human readable strings 1358 event.PopulateEventDisplayMessage() 1359 1360 // Propagate failure from event to task state 1361 if event.FailsTask { 1362 tr.state.Failed = true 1363 } 1364 1365 // XXX This seems like a super awkward spot for this? Why not shouldRestart? 1366 // Update restart metrics 1367 if event.Type == structs.TaskRestarting { 1368 metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels) 1369 tr.state.Restarts++ 1370 tr.state.LastRestart = time.Unix(0, event.Time) 1371 } 1372 1373 // Append event to slice 1374 appendTaskEvent(tr.state, event, tr.maxEvents) 1375 1376 return nil 1377 } 1378 1379 // WaitCh is closed when TaskRunner.Run exits. 1380 func (tr *TaskRunner) WaitCh() <-chan struct{} { 1381 return tr.waitCh 1382 } 1383 1384 // Update the running allocation with a new version received from the server. 1385 // Calls Update hooks asynchronously with Run. 1386 // 1387 // This method is safe for calling concurrently with Run and does not modify 1388 // the passed in allocation. 1389 func (tr *TaskRunner) Update(update *structs.Allocation) { 1390 task := update.LookupTask(tr.taskName) 1391 if task == nil { 1392 // This should not happen and likely indicates a bug in the 1393 // server or client. 1394 tr.logger.Error("allocation update is missing task; killing", 1395 "group", update.TaskGroup) 1396 te := structs.NewTaskEvent(structs.TaskKilled). 1397 SetKillReason("update missing task"). 1398 SetFailsTask() 1399 tr.Kill(context.Background(), te) 1400 return 1401 } 1402 1403 // Update tr.alloc 1404 tr.setAlloc(update, task) 1405 1406 // Trigger update hooks if not terminal 1407 if !update.TerminalStatus() { 1408 tr.triggerUpdateHooks() 1409 } 1410 } 1411 1412 // SetNetworkIsolation is called by the PreRun allocation hook after configuring 1413 // the network isolation for the allocation 1414 func (tr *TaskRunner) SetNetworkIsolation(n *drivers.NetworkIsolationSpec) { 1415 tr.networkIsolationLock.Lock() 1416 tr.networkIsolationSpec = n 1417 tr.networkIsolationLock.Unlock() 1418 } 1419 1420 // triggerUpdate if there isn't already an update pending. Should be called 1421 // instead of calling updateHooks directly to serialize runs of update hooks. 1422 // TaskRunner state should be updated prior to triggering update hooks. 1423 // 1424 // Does not block. 1425 func (tr *TaskRunner) triggerUpdateHooks() { 1426 select { 1427 case tr.triggerUpdateCh <- struct{}{}: 1428 default: 1429 // already an update hook pending 1430 } 1431 } 1432 1433 // Shutdown TaskRunner gracefully without affecting the state of the task. 1434 // Shutdown blocks until the main Run loop exits. 1435 func (tr *TaskRunner) Shutdown() { 1436 tr.logger.Trace("shutting down") 1437 tr.shutdownCtxCancel() 1438 1439 <-tr.WaitCh() 1440 1441 // Run shutdown hooks to cleanup 1442 tr.shutdownHooks() 1443 1444 // Persist once more 1445 tr.persistLocalState() 1446 } 1447 1448 // LatestResourceUsage returns the last resource utilization datapoint 1449 // collected. May return nil if the task is not running or no resource 1450 // utilization has been collected yet. 1451 func (tr *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage { 1452 tr.resourceUsageLock.Lock() 1453 ru := tr.resourceUsage 1454 tr.resourceUsageLock.Unlock() 1455 1456 // Look up device statistics lazily when fetched, as currently we do not emit any stats for them yet 1457 if ru != nil && tr.deviceStatsReporter != nil { 1458 deviceResources := tr.taskResources.Devices 1459 ru.ResourceUsage.DeviceStats = tr.deviceStatsReporter.LatestDeviceResourceStats(deviceResources) 1460 } 1461 return ru 1462 } 1463 1464 // UpdateStats updates and emits the latest stats from the driver. 1465 func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) { 1466 tr.resourceUsageLock.Lock() 1467 tr.resourceUsage = ru 1468 tr.resourceUsageLock.Unlock() 1469 if ru != nil { 1470 tr.emitStats(ru) 1471 } 1472 } 1473 1474 // TODO Remove Backwardscompat or use tr.Alloc()? 1475 func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { 1476 alloc := tr.Alloc() 1477 var allocatedMem float32 1478 if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil { 1479 // Convert to bytes to match other memory metrics 1480 allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024 1481 } 1482 1483 ms := ru.ResourceUsage.MemoryStats 1484 1485 publishMetric := func(v uint64, reported, measured string) { 1486 if v != 0 || slices.Contains(ms.Measured, measured) { 1487 metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", reported}, 1488 float32(v), tr.baseLabels) 1489 } 1490 } 1491 1492 publishMetric(ms.RSS, "rss", "RSS") 1493 publishMetric(ms.Cache, "cache", "Cache") 1494 publishMetric(ms.Swap, "swap", "Swap") 1495 publishMetric(ms.MappedFile, "mapped_file", "Mapped File") 1496 publishMetric(ms.Usage, "usage", "Usage") 1497 publishMetric(ms.MaxUsage, "max_usage", "Max Usage") 1498 publishMetric(ms.KernelUsage, "kernel_usage", "Kernel Usage") 1499 publishMetric(ms.KernelMaxUsage, "kernel_max_usage", "Kernel Max Usage") 1500 if allocatedMem > 0 { 1501 metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"}, 1502 allocatedMem, tr.baseLabels) 1503 } 1504 } 1505 1506 // TODO Remove Backwardscompat or use tr.Alloc()? 1507 func (tr *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) { 1508 alloc := tr.Alloc() 1509 var allocatedCPU float32 1510 if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil { 1511 allocatedCPU = float32(taskRes.Cpu.CpuShares) 1512 } 1513 1514 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"}, 1515 float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels) 1516 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"}, 1517 float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels) 1518 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"}, 1519 float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels) 1520 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"}, 1521 float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels) 1522 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"}, 1523 float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels) 1524 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"}, 1525 float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels) 1526 if allocatedCPU > 0 { 1527 metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"}, 1528 allocatedCPU, tr.baseLabels) 1529 } 1530 } 1531 1532 // emitStats emits resource usage stats of tasks to remote metrics collector 1533 // sinks 1534 func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) { 1535 if !tr.clientConfig.PublishAllocationMetrics { 1536 return 1537 } 1538 1539 if ru.ResourceUsage.MemoryStats != nil { 1540 tr.setGaugeForMemory(ru) 1541 } else { 1542 tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil") 1543 } 1544 1545 if ru.ResourceUsage.CpuStats != nil { 1546 tr.setGaugeForCPU(ru) 1547 } else { 1548 tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil") 1549 } 1550 } 1551 1552 // appendTaskEvent updates the task status by appending the new event. 1553 func appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent, capacity int) { 1554 if state.Events == nil { 1555 state.Events = make([]*structs.TaskEvent, 1, capacity) 1556 state.Events[0] = event 1557 return 1558 } 1559 1560 // If we hit capacity, then shift it. 1561 if len(state.Events) == capacity { 1562 old := state.Events 1563 state.Events = make([]*structs.TaskEvent, 0, capacity) 1564 state.Events = append(state.Events, old[1:]...) 1565 } 1566 1567 state.Events = append(state.Events, event) 1568 } 1569 1570 func (tr *TaskRunner) TaskExecHandler() drivermanager.TaskExecHandler { 1571 // Check it is running 1572 handle := tr.getDriverHandle() 1573 if handle == nil { 1574 return nil 1575 } 1576 return handle.ExecStreaming 1577 } 1578 1579 func (tr *TaskRunner) DriverCapabilities() (*drivers.Capabilities, error) { 1580 return tr.driver.Capabilities() 1581 } 1582 1583 func (tr *TaskRunner) SetAllocHookResources(res *cstructs.AllocHookResources) { 1584 tr.allocHookResources = res 1585 } 1586 1587 // shutdownDelayCancel is used for testing only and cancels the 1588 // shutdownDelayCtx 1589 func (tr *TaskRunner) shutdownDelayCancel() { 1590 tr.shutdownDelayCancelFn() 1591 }