github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/alloc_runner.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 log "github.com/hashicorp/go-hclog" 10 multierror "github.com/hashicorp/go-multierror" 11 "github.com/hashicorp/nomad/client/allocdir" 12 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 13 "github.com/hashicorp/nomad/client/allocrunner/state" 14 "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" 15 "github.com/hashicorp/nomad/client/allocrunner/taskrunner" 16 "github.com/hashicorp/nomad/client/allocwatcher" 17 "github.com/hashicorp/nomad/client/config" 18 "github.com/hashicorp/nomad/client/consul" 19 "github.com/hashicorp/nomad/client/devicemanager" 20 "github.com/hashicorp/nomad/client/dynamicplugins" 21 cinterfaces "github.com/hashicorp/nomad/client/interfaces" 22 "github.com/hashicorp/nomad/client/lib/cgutil" 23 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 24 "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" 25 "github.com/hashicorp/nomad/client/serviceregistration" 26 "github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore" 27 "github.com/hashicorp/nomad/client/serviceregistration/wrapper" 28 cstate "github.com/hashicorp/nomad/client/state" 29 cstructs "github.com/hashicorp/nomad/client/structs" 30 "github.com/hashicorp/nomad/client/vaultclient" 31 "github.com/hashicorp/nomad/helper/pointer" 32 "github.com/hashicorp/nomad/nomad/structs" 33 "github.com/hashicorp/nomad/plugins/device" 34 "github.com/hashicorp/nomad/plugins/drivers" 35 ) 36 37 // allocRunner is used to run all the tasks in a given allocation 38 type allocRunner struct { 39 // id is the ID of the allocation. Can be accessed without a lock 40 id string 41 42 // Logger is the logger for the alloc runner. 43 logger log.Logger 44 45 // clientConfig is the client configuration block. 46 clientConfig *config.Config 47 48 // stateUpdater is used to emit updated alloc state 49 stateUpdater cinterfaces.AllocStateHandler 50 51 // taskStateUpdatedCh is ticked whenever task state as changed. Must 52 // have len==1 to allow nonblocking notification of state updates while 53 // the goroutine is already processing a previous update. 54 taskStateUpdatedCh chan struct{} 55 56 // taskStateUpdateHandlerCh is closed when the task state handling 57 // goroutine exits. It is unsafe to destroy the local allocation state 58 // before this goroutine exits. 59 taskStateUpdateHandlerCh chan struct{} 60 61 // allocUpdatedCh is a channel that is used to stream allocation updates into 62 // the allocUpdate handler. Must have len==1 to allow nonblocking notification 63 // of new allocation updates while the goroutine is processing a previous 64 // update. 65 allocUpdatedCh chan *structs.Allocation 66 67 // consulClient is the client used by the consul service hook for 68 // registering services and checks 69 consulClient serviceregistration.Handler 70 71 // consulProxiesClient is the client used by the envoy version hook for 72 // looking up supported envoy versions of the consul agent. 73 consulProxiesClient consul.SupportedProxiesAPI 74 75 // sidsClient is the client used by the service identity hook for 76 // managing SI tokens 77 sidsClient consul.ServiceIdentityAPI 78 79 // vaultClient is the used to manage Vault tokens 80 vaultClient vaultclient.VaultClient 81 82 // waitCh is closed when the Run loop has exited 83 waitCh chan struct{} 84 85 // destroyed is true when the Run loop has exited, postrun hooks have 86 // run, and alloc runner has been destroyed. Must acquire destroyedLock 87 // to access. 88 destroyed bool 89 90 // destroyCh is closed when the Run loop has exited, postrun hooks have 91 // run, and alloc runner has been destroyed. 92 destroyCh chan struct{} 93 94 // shutdown is true when the Run loop has exited, and shutdown hooks have 95 // run. Must acquire destroyedLock to access. 96 shutdown bool 97 98 // shutdownCh is closed when the Run loop has exited, and shutdown hooks 99 // have run. 100 shutdownCh chan struct{} 101 102 // destroyLaunched is true if Destroy has been called. Must acquire 103 // destroyedLock to access. 104 destroyLaunched bool 105 106 // shutdownLaunched is true if Shutdown has been called. Must acquire 107 // destroyedLock to access. 108 shutdownLaunched bool 109 110 // destroyedLock guards destroyed, destroyLaunched, shutdownLaunched, 111 // and serializes Shutdown/Destroy calls. 112 destroyedLock sync.Mutex 113 114 // Alloc captures the allocation being run. 115 alloc *structs.Allocation 116 allocLock sync.RWMutex 117 118 // state is the alloc runner's state 119 state *state.State 120 stateLock sync.RWMutex 121 122 stateDB cstate.StateDB 123 124 // allocDir is used to build the allocations directory structure. 125 allocDir *allocdir.AllocDir 126 127 // runnerHooks are alloc runner lifecycle hooks that should be run on state 128 // transistions. 129 runnerHooks []interfaces.RunnerHook 130 131 // hookState is the output of allocrunner hooks 132 hookState *cstructs.AllocHookResources 133 hookStateMu sync.RWMutex 134 135 // tasks are the set of task runners 136 tasks map[string]*taskrunner.TaskRunner 137 138 // deviceStatsReporter is used to lookup resource usage for alloc devices 139 deviceStatsReporter cinterfaces.DeviceStatsReporter 140 141 // allocBroadcaster sends client allocation updates to all listeners 142 allocBroadcaster *cstructs.AllocBroadcaster 143 144 // prevAllocWatcher allows waiting for any previous or preempted allocations 145 // to exit 146 prevAllocWatcher allocwatcher.PrevAllocWatcher 147 148 // prevAllocMigrator allows the migration of a previous allocations alloc dir. 149 prevAllocMigrator allocwatcher.PrevAllocMigrator 150 151 // dynamicRegistry contains all locally registered dynamic plugins (e.g csi 152 // plugins). 153 dynamicRegistry dynamicplugins.Registry 154 155 // csiManager is used to wait for CSI Volumes to be attached, and by the task 156 // runner to manage their mounting 157 csiManager csimanager.Manager 158 159 // cpusetManager is responsible for configuring task cgroups if supported by the platform 160 cpusetManager cgutil.CpusetManager 161 162 // devicemanager is used to mount devices as well as lookup device 163 // statistics 164 devicemanager devicemanager.Manager 165 166 // driverManager is responsible for dispensing driver plugins and registering 167 // event handlers 168 driverManager drivermanager.Manager 169 170 // serversContactedCh is passed to TaskRunners so they can detect when 171 // servers have been contacted for the first time in case of a failed 172 // restore. 173 serversContactedCh chan struct{} 174 175 // taskCoordinator is used to controlled when tasks are allowed to run 176 // depending on their lifecycle configuration. 177 taskCoordinator *tasklifecycle.Coordinator 178 179 shutdownDelayCtx context.Context 180 shutdownDelayCancelFn context.CancelFunc 181 182 // rpcClient is the RPC Client that should be used by the allocrunner and its 183 // hooks to communicate with Nomad Servers. 184 rpcClient RPCer 185 186 // serviceRegWrapper is the handler wrapper that is used by service hooks 187 // to perform service and check registration and deregistration. 188 serviceRegWrapper *wrapper.HandlerWrapper 189 190 // checkStore contains check status information 191 checkStore checkstore.Shim 192 193 // getter is an interface for retrieving artifacts. 194 getter cinterfaces.ArtifactGetter 195 } 196 197 // RPCer is the interface needed by hooks to make RPC calls. 198 type RPCer interface { 199 RPC(method string, args interface{}, reply interface{}) error 200 } 201 202 // NewAllocRunner returns a new allocation runner. 203 func NewAllocRunner(config *Config) (*allocRunner, error) { 204 alloc := config.Alloc 205 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 206 if tg == nil { 207 return nil, fmt.Errorf("failed to lookup task group %q", alloc.TaskGroup) 208 } 209 210 ar := &allocRunner{ 211 id: alloc.ID, 212 alloc: alloc, 213 clientConfig: config.ClientConfig, 214 consulClient: config.Consul, 215 consulProxiesClient: config.ConsulProxies, 216 sidsClient: config.ConsulSI, 217 vaultClient: config.Vault, 218 tasks: make(map[string]*taskrunner.TaskRunner, len(tg.Tasks)), 219 waitCh: make(chan struct{}), 220 destroyCh: make(chan struct{}), 221 shutdownCh: make(chan struct{}), 222 state: &state.State{}, 223 stateDB: config.StateDB, 224 stateUpdater: config.StateUpdater, 225 taskStateUpdatedCh: make(chan struct{}, 1), 226 taskStateUpdateHandlerCh: make(chan struct{}), 227 allocUpdatedCh: make(chan *structs.Allocation, 1), 228 deviceStatsReporter: config.DeviceStatsReporter, 229 prevAllocWatcher: config.PrevAllocWatcher, 230 prevAllocMigrator: config.PrevAllocMigrator, 231 dynamicRegistry: config.DynamicRegistry, 232 csiManager: config.CSIManager, 233 cpusetManager: config.CpusetManager, 234 devicemanager: config.DeviceManager, 235 driverManager: config.DriverManager, 236 serversContactedCh: config.ServersContactedCh, 237 rpcClient: config.RPCClient, 238 serviceRegWrapper: config.ServiceRegWrapper, 239 checkStore: config.CheckStore, 240 getter: config.Getter, 241 } 242 243 // Create the logger based on the allocation ID 244 ar.logger = config.Logger.Named("alloc_runner").With("alloc_id", alloc.ID) 245 246 // Create alloc broadcaster 247 ar.allocBroadcaster = cstructs.NewAllocBroadcaster(ar.logger) 248 249 // Create alloc dir 250 ar.allocDir = allocdir.NewAllocDir(ar.logger, config.ClientConfig.AllocDir, alloc.ID) 251 252 ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) 253 254 shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) 255 ar.shutdownDelayCtx = shutdownDelayCtx 256 ar.shutdownDelayCancelFn = shutdownDelayCancel 257 258 // Initialize the runners hooks. 259 if err := ar.initRunnerHooks(config.ClientConfig); err != nil { 260 return nil, err 261 } 262 263 // Create the TaskRunners 264 if err := ar.initTaskRunners(tg.Tasks); err != nil { 265 return nil, err 266 } 267 268 return ar, nil 269 } 270 271 // initTaskRunners creates task runners but does *not* run them. 272 func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { 273 for _, task := range tasks { 274 trConfig := &taskrunner.Config{ 275 Alloc: ar.alloc, 276 ClientConfig: ar.clientConfig, 277 Task: task, 278 TaskDir: ar.allocDir.NewTaskDir(task.Name), 279 Logger: ar.logger, 280 StateDB: ar.stateDB, 281 StateUpdater: ar, 282 DynamicRegistry: ar.dynamicRegistry, 283 Consul: ar.consulClient, 284 ConsulProxies: ar.consulProxiesClient, 285 ConsulSI: ar.sidsClient, 286 Vault: ar.vaultClient, 287 DeviceStatsReporter: ar.deviceStatsReporter, 288 CSIManager: ar.csiManager, 289 DeviceManager: ar.devicemanager, 290 DriverManager: ar.driverManager, 291 ServersContactedCh: ar.serversContactedCh, 292 StartConditionMetCh: ar.taskCoordinator.StartConditionForTask(task), 293 ShutdownDelayCtx: ar.shutdownDelayCtx, 294 ServiceRegWrapper: ar.serviceRegWrapper, 295 Getter: ar.getter, 296 } 297 298 if ar.cpusetManager != nil { 299 trConfig.CpusetCgroupPathGetter = ar.cpusetManager.CgroupPathFor(ar.id, task.Name) 300 } 301 302 // Create, but do not Run, the task runner 303 tr, err := taskrunner.NewTaskRunner(trConfig) 304 if err != nil { 305 return fmt.Errorf("failed creating runner for task %q: %v", task.Name, err) 306 } 307 308 ar.tasks[task.Name] = tr 309 } 310 return nil 311 } 312 313 func (ar *allocRunner) WaitCh() <-chan struct{} { 314 return ar.waitCh 315 } 316 317 // Run the AllocRunner. Starts tasks if the alloc is non-terminal and closes 318 // WaitCh when it exits. Should be started in a goroutine. 319 func (ar *allocRunner) Run() { 320 // Close the wait channel on return 321 defer close(ar.waitCh) 322 323 // Start the task state update handler 324 go ar.handleTaskStateUpdates() 325 326 // Start the alloc update handler 327 go ar.handleAllocUpdates() 328 329 // If task update chan has been closed, that means we've been shutdown. 330 select { 331 case <-ar.taskStateUpdateHandlerCh: 332 return 333 default: 334 } 335 336 // When handling (potentially restored) terminal alloc, ensure tasks and post-run hooks are run 337 // to perform any cleanup that's necessary, potentially not done prior to earlier termination 338 339 // Run the prestart hooks if non-terminal 340 if ar.shouldRun() { 341 if err := ar.prerun(); err != nil { 342 ar.logger.Error("prerun failed", "error", err) 343 344 for _, tr := range ar.tasks { 345 tr.MarkFailedDead(fmt.Sprintf("failed to setup alloc: %v", err)) 346 } 347 348 goto POST 349 } 350 } 351 352 // Run the runners (blocks until they exit) 353 ar.runTasks() 354 355 POST: 356 if ar.isShuttingDown() { 357 return 358 } 359 360 // Run the postrun hooks 361 if err := ar.postrun(); err != nil { 362 ar.logger.Error("postrun failed", "error", err) 363 } 364 365 } 366 367 // shouldRun returns true if the alloc is in a state that the alloc runner 368 // should run it. 369 func (ar *allocRunner) shouldRun() bool { 370 // Do not run allocs that are terminal 371 if ar.Alloc().TerminalStatus() { 372 ar.logger.Trace("alloc terminal; not running", 373 "desired_status", ar.Alloc().DesiredStatus, 374 "client_status", ar.Alloc().ClientStatus, 375 ) 376 return false 377 } 378 379 // It's possible that the alloc local state was marked terminal before 380 // the server copy of the alloc (checked above) was marked as terminal, 381 // so check the local state as well. 382 switch clientStatus := ar.AllocState().ClientStatus; clientStatus { 383 case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost: 384 ar.logger.Trace("alloc terminal; updating server and not running", "status", clientStatus) 385 return false 386 } 387 388 return true 389 } 390 391 // runTasks is used to run the task runners and block until they exit. 392 func (ar *allocRunner) runTasks() { 393 // Start and wait for all tasks. 394 for _, task := range ar.tasks { 395 go task.Run() 396 } 397 for _, task := range ar.tasks { 398 <-task.WaitCh() 399 } 400 } 401 402 // Alloc returns the current allocation being run by this runner as sent by the 403 // server. This view of the allocation does not have updated task states. 404 func (ar *allocRunner) Alloc() *structs.Allocation { 405 ar.allocLock.RLock() 406 defer ar.allocLock.RUnlock() 407 return ar.alloc 408 } 409 410 func (ar *allocRunner) setAlloc(updated *structs.Allocation) { 411 ar.allocLock.Lock() 412 ar.alloc = updated 413 ar.allocLock.Unlock() 414 } 415 416 // GetAllocDir returns the alloc dir which is safe for concurrent use. 417 func (ar *allocRunner) GetAllocDir() *allocdir.AllocDir { 418 return ar.allocDir 419 } 420 421 // Restore state from database. Must be called after NewAllocRunner but before 422 // Run. 423 func (ar *allocRunner) Restore() error { 424 // Retrieve deployment status to avoid reseting it across agent 425 // restarts. Once a deployment status is set Nomad no longer monitors 426 // alloc health, so we must persist deployment state across restarts. 427 ds, err := ar.stateDB.GetDeploymentStatus(ar.id) 428 if err != nil { 429 return err 430 } 431 432 ns, err := ar.stateDB.GetNetworkStatus(ar.id) 433 if err != nil { 434 return err 435 } 436 437 ar.stateLock.Lock() 438 ar.state.DeploymentStatus = ds 439 ar.state.NetworkStatus = ns 440 ar.stateLock.Unlock() 441 442 states := make(map[string]*structs.TaskState) 443 444 // Restore task runners 445 for _, tr := range ar.tasks { 446 if err := tr.Restore(); err != nil { 447 return err 448 } 449 states[tr.Task().Name] = tr.TaskState() 450 } 451 452 ar.taskCoordinator.Restore(states) 453 454 return nil 455 } 456 457 // persistDeploymentStatus stores AllocDeploymentStatus. 458 func (ar *allocRunner) persistDeploymentStatus(ds *structs.AllocDeploymentStatus) { 459 if err := ar.stateDB.PutDeploymentStatus(ar.id, ds); err != nil { 460 // While any persistence errors are very bad, the worst case 461 // scenario for failing to persist deployment status is that if 462 // the agent is restarted it will monitor the deployment status 463 // again. This could cause a deployment's status to change when 464 // that shouldn't happen. However, allowing that seems better 465 // than failing the entire allocation. 466 ar.logger.Error("error storing deployment status", "error", err) 467 } 468 } 469 470 // TaskStateUpdated is called by TaskRunner when a task's state has been 471 // updated. It does not process the update synchronously but instead notifies a 472 // goroutine the state has change. Since processing the state change may cause 473 // the task to be killed (thus change its state again) it cannot be done 474 // synchronously as it would cause a deadlock due to reentrancy. 475 // 476 // The goroutine is used to compute changes to the alloc's ClientStatus and to 477 // update the server with the new state. 478 func (ar *allocRunner) TaskStateUpdated() { 479 select { 480 case ar.taskStateUpdatedCh <- struct{}{}: 481 default: 482 // already pending updates 483 } 484 } 485 486 // handleTaskStateUpdates must be run in goroutine as it monitors 487 // taskStateUpdatedCh for task state update notifications and processes task 488 // states. 489 // 490 // Processing task state updates must be done in a goroutine as it may have to 491 // kill tasks which causes further task state updates. 492 func (ar *allocRunner) handleTaskStateUpdates() { 493 defer close(ar.taskStateUpdateHandlerCh) 494 495 hasSidecars := hasSidecarTasks(ar.tasks) 496 497 for done := false; !done; { 498 select { 499 case <-ar.taskStateUpdatedCh: 500 case <-ar.waitCh: 501 // Run has exited, sync once more to ensure final 502 // states are collected. 503 done = true 504 } 505 506 ar.logger.Trace("handling task state update", "done", done) 507 508 // Set with the appropriate event if task runners should be 509 // killed. 510 var killEvent *structs.TaskEvent 511 512 // If task runners should be killed, this is set to the task 513 // name whose fault it is. 514 killTask := "" 515 516 // Task state has been updated; gather the state of the other tasks 517 trNum := len(ar.tasks) 518 liveRunners := make([]*taskrunner.TaskRunner, 0, trNum) 519 states := make(map[string]*structs.TaskState, trNum) 520 521 for name, tr := range ar.tasks { 522 taskState := tr.TaskState() 523 states[name] = taskState 524 525 if tr.IsPoststopTask() { 526 continue 527 } 528 529 // Capture live task runners in case we need to kill them 530 if taskState.State != structs.TaskStateDead { 531 liveRunners = append(liveRunners, tr) 532 continue 533 } 534 535 // Task is dead, determine if other tasks should be killed 536 if taskState.Failed { 537 // Only set failed event if no event has been 538 // set yet to give dead leaders priority. 539 if killEvent == nil { 540 killTask = name 541 killEvent = structs.NewTaskEvent(structs.TaskSiblingFailed). 542 SetFailedSibling(name) 543 } 544 } else if tr.IsLeader() { 545 killEvent = structs.NewTaskEvent(structs.TaskLeaderDead) 546 } 547 } 548 549 if len(liveRunners) > 0 { 550 // if all live runners are sidecars - kill alloc 551 onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners) 552 if killEvent == nil && onlySidecarsRemaining { 553 killEvent = structs.NewTaskEvent(structs.TaskMainDead) 554 } 555 556 // If there's a kill event set and live runners, kill them 557 if killEvent != nil { 558 559 // Log kill reason 560 switch killEvent.Type { 561 case structs.TaskLeaderDead: 562 ar.logger.Debug("leader task dead, destroying all tasks", "leader_task", killTask) 563 case structs.TaskMainDead: 564 ar.logger.Debug("main tasks dead, destroying all sidecar tasks") 565 default: 566 ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask) 567 } 568 569 // Emit kill event for live runners 570 for _, tr := range liveRunners { 571 tr.EmitEvent(killEvent) 572 } 573 574 // Kill 'em all 575 states = ar.killTasks() 576 577 // Wait for TaskRunners to exit before continuing. This will 578 // prevent looping before TaskRunners have transitioned to 579 // Dead. 580 for _, tr := range liveRunners { 581 ar.logger.Info("waiting for task to exit", "task", tr.Task().Name) 582 select { 583 case <-tr.WaitCh(): 584 case <-ar.waitCh: 585 } 586 } 587 } 588 } else { 589 // If there are no live runners left kill all non-poststop task 590 // runners to unblock them from the alloc restart loop. 591 for _, tr := range ar.tasks { 592 if tr.IsPoststopTask() { 593 continue 594 } 595 596 select { 597 case <-tr.WaitCh(): 598 case <-ar.waitCh: 599 default: 600 // Kill task runner without setting an event because the 601 // task is already dead, it's just waiting in the alloc 602 // restart loop. 603 err := tr.Kill(context.TODO(), nil) 604 if err != nil { 605 ar.logger.Warn("failed to kill task", "task", tr.Task().Name, "error", err) 606 } 607 } 608 } 609 } 610 611 ar.taskCoordinator.TaskStateUpdated(states) 612 613 // Get the client allocation 614 calloc := ar.clientAlloc(states) 615 616 // Update the server 617 ar.stateUpdater.AllocStateUpdated(calloc) 618 619 // Broadcast client alloc to listeners 620 ar.allocBroadcaster.Send(calloc) 621 } 622 } 623 624 // hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks 625 func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool { 626 for _, tr := range tasks { 627 if !tr.IsSidecarTask() { 628 return true 629 } 630 } 631 632 return false 633 } 634 635 // hasSidecarTasks returns true if any of the passed tasks are sidecar tasks 636 func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool { 637 for _, tr := range tasks { 638 if tr.IsSidecarTask() { 639 return true 640 } 641 } 642 643 return false 644 } 645 646 // killTasks kills all task runners, leader (if there is one) first. Errors are 647 // logged except taskrunner.ErrTaskNotRunning which is ignored. Task states 648 // after Kill has been called are returned. 649 func (ar *allocRunner) killTasks() map[string]*structs.TaskState { 650 var mu sync.Mutex 651 states := make(map[string]*structs.TaskState, len(ar.tasks)) 652 653 // run alloc prekill hooks 654 ar.preKillHooks() 655 656 // Kill leader first, synchronously 657 for name, tr := range ar.tasks { 658 if !tr.IsLeader() { 659 continue 660 } 661 662 taskEvent := structs.NewTaskEvent(structs.TaskKilling) 663 taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) 664 err := tr.Kill(context.TODO(), taskEvent) 665 if err != nil && err != taskrunner.ErrTaskNotRunning { 666 ar.logger.Warn("error stopping leader task", "error", err, "task_name", name) 667 } 668 669 taskState := tr.TaskState() 670 states[name] = taskState 671 break 672 } 673 674 // Kill the rest non-sidecar and non-poststop tasks concurrently 675 wg := sync.WaitGroup{} 676 for name, tr := range ar.tasks { 677 // Filter out poststop and sidecar tasks so that they stop after all the other tasks are killed 678 if tr.IsLeader() || tr.IsPoststopTask() || tr.IsSidecarTask() { 679 continue 680 } 681 682 wg.Add(1) 683 go func(name string, tr *taskrunner.TaskRunner) { 684 defer wg.Done() 685 taskEvent := structs.NewTaskEvent(structs.TaskKilling) 686 taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) 687 err := tr.Kill(context.TODO(), taskEvent) 688 if err != nil && err != taskrunner.ErrTaskNotRunning { 689 ar.logger.Warn("error stopping task", "error", err, "task_name", name) 690 } 691 692 taskState := tr.TaskState() 693 mu.Lock() 694 states[name] = taskState 695 mu.Unlock() 696 }(name, tr) 697 } 698 wg.Wait() 699 700 // Kill the sidecar tasks last. 701 for name, tr := range ar.tasks { 702 if !tr.IsSidecarTask() || tr.IsLeader() || tr.IsPoststopTask() { 703 continue 704 } 705 706 wg.Add(1) 707 go func(name string, tr *taskrunner.TaskRunner) { 708 defer wg.Done() 709 taskEvent := structs.NewTaskEvent(structs.TaskKilling) 710 taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) 711 err := tr.Kill(context.TODO(), taskEvent) 712 if err != nil && err != taskrunner.ErrTaskNotRunning { 713 ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name) 714 } 715 716 taskState := tr.TaskState() 717 mu.Lock() 718 states[name] = taskState 719 mu.Unlock() 720 }(name, tr) 721 } 722 wg.Wait() 723 724 return states 725 } 726 727 // clientAlloc takes in the task states and returns an Allocation populated 728 // with Client specific fields 729 func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *structs.Allocation { 730 ar.stateLock.Lock() 731 defer ar.stateLock.Unlock() 732 733 // store task states for AllocState to expose 734 ar.state.TaskStates = taskStates 735 736 a := &structs.Allocation{ 737 ID: ar.id, 738 TaskStates: taskStates, 739 } 740 741 if d := ar.state.DeploymentStatus; d != nil { 742 a.DeploymentStatus = d.Copy() 743 } 744 745 // Compute the ClientStatus 746 if ar.state.ClientStatus != "" { 747 // The client status is being forced 748 a.ClientStatus, a.ClientDescription = ar.state.ClientStatus, ar.state.ClientDescription 749 } else { 750 a.ClientStatus, a.ClientDescription = getClientStatus(taskStates) 751 } 752 753 // If the allocation is terminal, make sure all required fields are properly 754 // set. 755 if a.ClientTerminalStatus() { 756 alloc := ar.Alloc() 757 758 // If we are part of a deployment and the alloc has failed, mark the 759 // alloc as unhealthy. This guards against the watcher not be started. 760 // If the health status is already set then terminal allocations should not 761 if a.ClientStatus == structs.AllocClientStatusFailed && 762 alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() { 763 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 764 Healthy: pointer.Of(false), 765 } 766 } 767 768 // Make sure we have marked the finished at for every task. This is used 769 // to calculate the reschedule time for failed allocations. 770 now := time.Now() 771 for taskName := range ar.tasks { 772 ts, ok := a.TaskStates[taskName] 773 if !ok { 774 ts = &structs.TaskState{} 775 a.TaskStates[taskName] = ts 776 } 777 if ts.FinishedAt.IsZero() { 778 ts.FinishedAt = now 779 } 780 } 781 } 782 783 // Set the NetworkStatus and default DNSConfig if one is not returned from the client 784 netStatus := ar.state.NetworkStatus 785 if netStatus != nil { 786 a.NetworkStatus = netStatus 787 } else { 788 a.NetworkStatus = new(structs.AllocNetworkStatus) 789 } 790 791 if a.NetworkStatus.DNS == nil { 792 alloc := ar.Alloc() 793 nws := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Networks 794 if len(nws) > 0 { 795 a.NetworkStatus.DNS = nws[0].DNS.Copy() 796 } 797 } 798 799 return a 800 } 801 802 // getClientStatus takes in the task states for a given allocation and computes 803 // the client status and description 804 func getClientStatus(taskStates map[string]*structs.TaskState) (status, description string) { 805 var pending, running, dead, failed bool 806 for _, state := range taskStates { 807 switch state.State { 808 case structs.TaskStateRunning: 809 running = true 810 case structs.TaskStatePending: 811 pending = true 812 case structs.TaskStateDead: 813 if state.Failed { 814 failed = true 815 } else { 816 dead = true 817 } 818 } 819 } 820 821 // Determine the alloc status 822 if failed { 823 return structs.AllocClientStatusFailed, "Failed tasks" 824 } else if running { 825 return structs.AllocClientStatusRunning, "Tasks are running" 826 } else if pending { 827 return structs.AllocClientStatusPending, "No tasks have started" 828 } else if dead { 829 return structs.AllocClientStatusComplete, "All tasks have completed" 830 } 831 832 return "", "" 833 } 834 835 // SetClientStatus is a helper for forcing a specific client 836 // status on the alloc runner. This is used during restore errors 837 // when the task state can't be restored. 838 func (ar *allocRunner) SetClientStatus(clientStatus string) { 839 ar.stateLock.Lock() 840 defer ar.stateLock.Unlock() 841 ar.state.ClientStatus = clientStatus 842 } 843 844 func (ar *allocRunner) SetNetworkStatus(s *structs.AllocNetworkStatus) { 845 ar.stateLock.Lock() 846 defer ar.stateLock.Unlock() 847 ar.state.NetworkStatus = s.Copy() 848 } 849 850 func (ar *allocRunner) NetworkStatus() *structs.AllocNetworkStatus { 851 ar.stateLock.Lock() 852 defer ar.stateLock.Unlock() 853 return ar.state.NetworkStatus.Copy() 854 } 855 856 // setIndexes is a helper for forcing alloc state on the alloc runner. This is 857 // used during reconnect when the task has been marked unknown by the server. 858 func (ar *allocRunner) setIndexes(update *structs.Allocation) { 859 ar.allocLock.Lock() 860 defer ar.allocLock.Unlock() 861 ar.alloc.AllocModifyIndex = update.AllocModifyIndex 862 ar.alloc.ModifyIndex = update.ModifyIndex 863 ar.alloc.ModifyTime = update.ModifyTime 864 } 865 866 // AllocState returns a copy of allocation state including a snapshot of task 867 // states. 868 func (ar *allocRunner) AllocState() *state.State { 869 ar.stateLock.RLock() 870 state := ar.state.Copy() 871 ar.stateLock.RUnlock() 872 873 // If TaskStateUpdated has not been called yet, ar.state.TaskStates 874 // won't be set as it is not the canonical source of TaskStates. 875 if len(state.TaskStates) == 0 { 876 ar.state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks)) 877 for k, tr := range ar.tasks { 878 state.TaskStates[k] = tr.TaskState() 879 } 880 } 881 882 // Generate alloc to get other state fields 883 alloc := ar.clientAlloc(state.TaskStates) 884 state.ClientStatus = alloc.ClientStatus 885 state.ClientDescription = alloc.ClientDescription 886 state.DeploymentStatus = alloc.DeploymentStatus 887 888 return state 889 } 890 891 // Update asyncronously updates the running allocation with a new version 892 // received from the server. 893 // When processing a new update, we will first attempt to drain stale updates 894 // from the queue, before appending the new one. 895 func (ar *allocRunner) Update(update *structs.Allocation) { 896 select { 897 // Drain queued update from the channel if possible, and check the modify 898 // index 899 case oldUpdate := <-ar.allocUpdatedCh: 900 // If the old update is newer than the replacement, then skip the new one 901 // and return. This case shouldn't happen, but may in the case of a bug 902 // elsewhere inside the system. 903 if oldUpdate.AllocModifyIndex > update.AllocModifyIndex { 904 ar.logger.Debug("Discarding allocation update due to newer alloc revision in queue", 905 "old_modify_index", oldUpdate.AllocModifyIndex, 906 "new_modify_index", update.AllocModifyIndex) 907 ar.allocUpdatedCh <- oldUpdate 908 return 909 } else { 910 ar.logger.Debug("Discarding allocation update", 911 "skipped_modify_index", oldUpdate.AllocModifyIndex, 912 "new_modify_index", update.AllocModifyIndex) 913 } 914 case <-ar.waitCh: 915 ar.logger.Trace("AllocRunner has terminated, skipping alloc update", 916 "modify_index", update.AllocModifyIndex) 917 return 918 default: 919 } 920 921 if update.DesiredTransition.ShouldIgnoreShutdownDelay() { 922 ar.shutdownDelayCancelFn() 923 } 924 925 // Queue the new update 926 ar.allocUpdatedCh <- update 927 } 928 929 func (ar *allocRunner) handleAllocUpdates() { 930 for { 931 select { 932 case update := <-ar.allocUpdatedCh: 933 ar.handleAllocUpdate(update) 934 case <-ar.waitCh: 935 return 936 } 937 } 938 } 939 940 // This method sends the updated alloc to Run for serially processing updates. 941 // If there is already a pending update it will be discarded and replaced by 942 // the latest update. 943 func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) { 944 // Detect Stop updates 945 stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus() 946 947 // Update ar.alloc 948 ar.setAlloc(update) 949 950 // Run update hooks if not stopping or dead 951 if !update.TerminalStatus() { 952 if err := ar.update(update); err != nil { 953 ar.logger.Error("error running update hooks", "error", err) 954 } 955 956 } 957 958 // Update task runners 959 for _, tr := range ar.tasks { 960 tr.Update(update) 961 } 962 963 // If alloc is being terminated, kill all tasks, leader first 964 if stopping { 965 ar.killTasks() 966 } 967 968 } 969 970 func (ar *allocRunner) Listener() *cstructs.AllocListener { 971 return ar.allocBroadcaster.Listen() 972 } 973 974 func (ar *allocRunner) destroyImpl() { 975 // Stop any running tasks and persist states in case the client is 976 // shutdown before Destroy finishes. 977 states := ar.killTasks() 978 calloc := ar.clientAlloc(states) 979 ar.stateUpdater.AllocStateUpdated(calloc) 980 981 // Wait for tasks to exit and postrun hooks to finish 982 <-ar.waitCh 983 984 // Run destroy hooks 985 if err := ar.destroy(); err != nil { 986 ar.logger.Warn("error running destroy hooks", "error", err) 987 } 988 989 // Wait for task state update handler to exit before removing local 990 // state if Run() ran at all. 991 <-ar.taskStateUpdateHandlerCh 992 993 // Mark alloc as destroyed 994 ar.destroyedLock.Lock() 995 996 // Cleanup state db; while holding the lock to avoid 997 // a race periodic PersistState that may resurrect the alloc 998 if err := ar.stateDB.DeleteAllocationBucket(ar.id); err != nil { 999 ar.logger.Warn("failed to delete allocation state", "error", err) 1000 } 1001 1002 if !ar.shutdown { 1003 ar.shutdown = true 1004 close(ar.shutdownCh) 1005 } 1006 1007 ar.destroyed = true 1008 close(ar.destroyCh) 1009 1010 ar.destroyedLock.Unlock() 1011 } 1012 1013 func (ar *allocRunner) PersistState() error { 1014 ar.destroyedLock.Lock() 1015 defer ar.destroyedLock.Unlock() 1016 1017 if ar.destroyed { 1018 err := ar.stateDB.DeleteAllocationBucket(ar.id, cstate.WithBatchMode()) 1019 if err != nil { 1020 ar.logger.Warn("failed to delete allocation bucket", "error", err) 1021 } 1022 return nil 1023 } 1024 1025 // persist network status, wrapping in a func to release state lock as early as possible 1026 err := func() error { 1027 ar.stateLock.Lock() 1028 defer ar.stateLock.Unlock() 1029 if ar.state.NetworkStatus != nil { 1030 err := ar.stateDB.PutNetworkStatus(ar.id, ar.state.NetworkStatus, cstate.WithBatchMode()) 1031 if err != nil { 1032 return err 1033 } 1034 } 1035 return nil 1036 }() 1037 if err != nil { 1038 return err 1039 } 1040 1041 // TODO: consider persisting deployment state along with task status. 1042 // While we study why only the alloc is persisted, I opted to maintain current 1043 // behavior and not risk adding yet more IO calls unnecessarily. 1044 return ar.stateDB.PutAllocation(ar.Alloc(), cstate.WithBatchMode()) 1045 } 1046 1047 // Destroy the alloc runner by stopping it if it is still running and cleaning 1048 // up all of its resources. 1049 // 1050 // This method is safe for calling concurrently with Run() and will cause it to 1051 // exit (thus closing WaitCh). 1052 // When the destroy action is completed, it will close DestroyCh(). 1053 func (ar *allocRunner) Destroy() { 1054 ar.destroyedLock.Lock() 1055 defer ar.destroyedLock.Unlock() 1056 1057 if ar.destroyed { 1058 // Only destroy once 1059 return 1060 } 1061 1062 if ar.destroyLaunched { 1063 // Only dispatch a destroy once 1064 return 1065 } 1066 1067 ar.destroyLaunched = true 1068 1069 // Synchronize calls to shutdown/destroy 1070 if ar.shutdownLaunched { 1071 go func() { 1072 ar.logger.Debug("Waiting for shutdown before destroying runner") 1073 <-ar.shutdownCh 1074 ar.destroyImpl() 1075 }() 1076 1077 return 1078 } 1079 1080 go ar.destroyImpl() 1081 } 1082 1083 // IsDestroyed returns true if the alloc runner has been destroyed (stopped and 1084 // garbage collected). 1085 // 1086 // This method is safe for calling concurrently with Run(). Callers must 1087 // receive on WaitCh() to block until alloc runner has stopped and been 1088 // destroyed. 1089 func (ar *allocRunner) IsDestroyed() bool { 1090 ar.destroyedLock.Lock() 1091 defer ar.destroyedLock.Unlock() 1092 return ar.destroyed 1093 } 1094 1095 // IsWaiting returns true if the alloc runner is waiting for its previous 1096 // allocation to terminate. 1097 // 1098 // This method is safe for calling concurrently with Run(). 1099 func (ar *allocRunner) IsWaiting() bool { 1100 return ar.prevAllocWatcher.IsWaiting() 1101 } 1102 1103 // isShuttingDown returns true if the alloc runner is in a shutdown state 1104 // due to a call to Shutdown() or Destroy() 1105 func (ar *allocRunner) isShuttingDown() bool { 1106 ar.destroyedLock.Lock() 1107 defer ar.destroyedLock.Unlock() 1108 return ar.shutdownLaunched 1109 } 1110 1111 // DestroyCh is a channel that is closed when an allocrunner is closed due to 1112 // an explicit call to Destroy(). 1113 func (ar *allocRunner) DestroyCh() <-chan struct{} { 1114 return ar.destroyCh 1115 } 1116 1117 // ShutdownCh is a channel that is closed when an allocrunner is closed due to 1118 // either an explicit call to Shutdown(), or Destroy(). 1119 func (ar *allocRunner) ShutdownCh() <-chan struct{} { 1120 return ar.shutdownCh 1121 } 1122 1123 // Shutdown AllocRunner gracefully. Asynchronously shuts down all TaskRunners. 1124 // Tasks are unaffected and may be restored. 1125 // When the destroy action is completed, it will close ShutdownCh(). 1126 func (ar *allocRunner) Shutdown() { 1127 ar.destroyedLock.Lock() 1128 defer ar.destroyedLock.Unlock() 1129 1130 // Destroy is a superset of Shutdown so there's nothing to do if this 1131 // has already been destroyed. 1132 if ar.destroyed { 1133 return 1134 } 1135 1136 // Destroy is a superset of Shutdown so if it's been marked for destruction, 1137 // don't try and shutdown in parallel. If shutdown has been launched, don't 1138 // try again. 1139 if ar.destroyLaunched || ar.shutdownLaunched { 1140 return 1141 } 1142 1143 ar.shutdownLaunched = true 1144 1145 go func() { 1146 ar.logger.Trace("shutting down") 1147 1148 // Shutdown tasks gracefully if they were run 1149 wg := sync.WaitGroup{} 1150 for _, tr := range ar.tasks { 1151 wg.Add(1) 1152 go func(tr *taskrunner.TaskRunner) { 1153 tr.Shutdown() 1154 wg.Done() 1155 }(tr) 1156 } 1157 wg.Wait() 1158 1159 // Wait for Run to exit 1160 <-ar.waitCh 1161 1162 // Run shutdown hooks 1163 ar.shutdownHooks() 1164 1165 // Wait for updater to finish its final run 1166 <-ar.taskStateUpdateHandlerCh 1167 1168 ar.destroyedLock.Lock() 1169 ar.shutdown = true 1170 close(ar.shutdownCh) 1171 ar.destroyedLock.Unlock() 1172 }() 1173 } 1174 1175 // IsMigrating returns true if the alloc runner is migrating data from its 1176 // previous allocation. 1177 // 1178 // This method is safe for calling concurrently with Run(). 1179 func (ar *allocRunner) IsMigrating() bool { 1180 return ar.prevAllocMigrator.IsMigrating() 1181 } 1182 1183 func (ar *allocRunner) StatsReporter() interfaces.AllocStatsReporter { 1184 return ar 1185 } 1186 1187 // LatestAllocStats returns the latest stats for an allocation. If taskFilter 1188 // is set, only stats for that task -- if it exists -- are returned. 1189 func (ar *allocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) { 1190 astat := &cstructs.AllocResourceUsage{ 1191 Tasks: make(map[string]*cstructs.TaskResourceUsage, len(ar.tasks)), 1192 ResourceUsage: &cstructs.ResourceUsage{ 1193 MemoryStats: &cstructs.MemoryStats{}, 1194 CpuStats: &cstructs.CpuStats{}, 1195 DeviceStats: []*device.DeviceGroupStats{}, 1196 }, 1197 } 1198 1199 for name, tr := range ar.tasks { 1200 if taskFilter != "" && taskFilter != name { 1201 // Getting stats for a particular task and its not this one! 1202 continue 1203 } 1204 1205 if usage := tr.LatestResourceUsage(); usage != nil { 1206 astat.Tasks[name] = usage 1207 astat.ResourceUsage.Add(usage.ResourceUsage) 1208 if usage.Timestamp > astat.Timestamp { 1209 astat.Timestamp = usage.Timestamp 1210 } 1211 } 1212 } 1213 1214 return astat, nil 1215 } 1216 1217 func (ar *allocRunner) GetTaskEventHandler(taskName string) drivermanager.EventHandler { 1218 if tr, ok := ar.tasks[taskName]; ok { 1219 return func(ev *drivers.TaskEvent) { 1220 tr.EmitEvent(&structs.TaskEvent{ 1221 Type: structs.TaskDriverMessage, 1222 Time: ev.Timestamp.UnixNano(), 1223 Details: ev.Annotations, 1224 DriverMessage: ev.Message, 1225 }) 1226 } 1227 } 1228 return nil 1229 } 1230 1231 // Restart satisfies the WorkloadRestarter interface and restarts all tasks 1232 // that are currently running. 1233 func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error { 1234 return ar.restartTasks(ctx, event, failure, false) 1235 } 1236 1237 // RestartTask restarts the provided task. 1238 func (ar *allocRunner) RestartTask(taskName string, event *structs.TaskEvent) error { 1239 tr, ok := ar.tasks[taskName] 1240 if !ok { 1241 return fmt.Errorf("Could not find task runner for task: %s", taskName) 1242 } 1243 1244 return tr.Restart(context.TODO(), event, false) 1245 } 1246 1247 // RestartRunning restarts all tasks that are currently running. 1248 func (ar *allocRunner) RestartRunning(event *structs.TaskEvent) error { 1249 return ar.restartTasks(context.TODO(), event, false, false) 1250 } 1251 1252 // RestartAll restarts all tasks in the allocation, including dead ones. They 1253 // will restart following their lifecycle order. 1254 func (ar *allocRunner) RestartAll(event *structs.TaskEvent) error { 1255 // Restart the taskCoordinator to allow dead tasks to run again. 1256 ar.taskCoordinator.Restart() 1257 return ar.restartTasks(context.TODO(), event, false, true) 1258 } 1259 1260 // restartTasks restarts all task runners concurrently. 1261 func (ar *allocRunner) restartTasks(ctx context.Context, event *structs.TaskEvent, failure bool, force bool) error { 1262 waitCh := make(chan struct{}) 1263 var err *multierror.Error 1264 var errMutex sync.Mutex 1265 1266 // run alloc task restart hooks 1267 ar.taskRestartHooks() 1268 1269 go func() { 1270 var wg sync.WaitGroup 1271 defer close(waitCh) 1272 for tn, tr := range ar.tasks { 1273 wg.Add(1) 1274 go func(taskName string, taskRunner *taskrunner.TaskRunner) { 1275 defer wg.Done() 1276 1277 var e error 1278 if force { 1279 e = taskRunner.ForceRestart(ctx, event.Copy(), failure) 1280 } else { 1281 e = taskRunner.Restart(ctx, event.Copy(), failure) 1282 } 1283 1284 // Ignore ErrTaskNotRunning errors since tasks that are not 1285 // running are expected to not be restarted. 1286 if e != nil && e != taskrunner.ErrTaskNotRunning { 1287 errMutex.Lock() 1288 defer errMutex.Unlock() 1289 err = multierror.Append(err, fmt.Errorf("failed to restart task %s: %v", taskName, e)) 1290 } 1291 }(tn, tr) 1292 } 1293 wg.Wait() 1294 }() 1295 1296 select { 1297 case <-waitCh: 1298 case <-ctx.Done(): 1299 } 1300 1301 return err.ErrorOrNil() 1302 } 1303 1304 // Signal sends a signal request to task runners inside an allocation. If the 1305 // taskName is empty, then it is sent to all tasks. 1306 func (ar *allocRunner) Signal(taskName, signal string) error { 1307 event := structs.NewTaskEvent(structs.TaskSignaling).SetSignalText(signal) 1308 1309 if taskName != "" { 1310 tr, ok := ar.tasks[taskName] 1311 if !ok { 1312 return fmt.Errorf("Task not found") 1313 } 1314 1315 return tr.Signal(event, signal) 1316 } 1317 1318 var err *multierror.Error 1319 1320 for tn, tr := range ar.tasks { 1321 rerr := tr.Signal(event.Copy(), signal) 1322 if rerr != nil { 1323 err = multierror.Append(err, fmt.Errorf("Failed to signal task: %s, err: %v", tn, rerr)) 1324 } 1325 } 1326 1327 return err.ErrorOrNil() 1328 } 1329 1330 // Reconnect logs a reconnect event for each task in the allocation and syncs the current alloc state with the server. 1331 func (ar *allocRunner) Reconnect(update *structs.Allocation) (err error) { 1332 event := structs.NewTaskEvent(structs.TaskClientReconnected) 1333 event.Time = time.Now().UnixNano() 1334 for _, tr := range ar.tasks { 1335 tr.AppendEvent(event) 1336 } 1337 1338 // Update the client alloc with the server side indexes. 1339 ar.setIndexes(update) 1340 1341 // Calculate alloc state to get the final state with the new events. 1342 // Cannot rely on AllocStates as it won't recompute TaskStates once they are set. 1343 states := make(map[string]*structs.TaskState, len(ar.tasks)) 1344 for name, tr := range ar.tasks { 1345 states[name] = tr.TaskState() 1346 } 1347 1348 // Build the client allocation 1349 alloc := ar.clientAlloc(states) 1350 1351 // Update the client state store. 1352 err = ar.stateUpdater.PutAllocation(alloc) 1353 if err != nil { 1354 return 1355 } 1356 1357 // Update the server. 1358 ar.stateUpdater.AllocStateUpdated(alloc) 1359 1360 // Broadcast client alloc to listeners. 1361 err = ar.allocBroadcaster.Send(alloc) 1362 1363 return 1364 } 1365 1366 func (ar *allocRunner) GetTaskExecHandler(taskName string) drivermanager.TaskExecHandler { 1367 tr, ok := ar.tasks[taskName] 1368 if !ok { 1369 return nil 1370 } 1371 1372 return tr.TaskExecHandler() 1373 } 1374 1375 func (ar *allocRunner) GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) { 1376 tr, ok := ar.tasks[taskName] 1377 if !ok { 1378 return nil, fmt.Errorf("task not found") 1379 } 1380 1381 return tr.DriverCapabilities() 1382 }