github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/remotetask_hook.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 6 hclog "github.com/hashicorp/go-hclog" 7 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/nomad/plugins/drivers" 10 ) 11 12 var _ interfaces.TaskPrestartHook = (*remoteTaskHook)(nil) 13 var _ interfaces.TaskPreKillHook = (*remoteTaskHook)(nil) 14 15 // remoteTaskHook reattaches to remotely executing tasks. 16 type remoteTaskHook struct { 17 tr *TaskRunner 18 19 logger hclog.Logger 20 } 21 22 func newRemoteTaskHook(tr *TaskRunner, logger hclog.Logger) interfaces.TaskHook { 23 h := &remoteTaskHook{ 24 tr: tr, 25 } 26 h.logger = logger.Named(h.Name()) 27 return h 28 } 29 30 func (h *remoteTaskHook) Name() string { 31 return "remote_task" 32 } 33 34 // Prestart performs 2 remote task driver related tasks: 35 // 1. If there is no local handle, see if there is a handle propagated from a 36 // previous alloc to be restored. 37 // 2. If the alloc is lost make sure the task signal is set to detach instead 38 // of kill. 39 func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { 40 if h.tr.getDriverHandle() != nil { 41 // Driver handle already exists so don't try to load remote 42 // task handle 43 return nil 44 } 45 46 h.tr.stateLock.Lock() 47 th := drivers.NewTaskHandleFromState(h.tr.state) 48 h.tr.stateLock.Unlock() 49 50 // Task handle will be nil if there was no previous allocation or if 51 // this is a destructive update 52 if th == nil { 53 resp.Done = true 54 return nil 55 } 56 57 // The task config is unique per invocation so recreate it here 58 th.Config = h.tr.buildTaskConfig() 59 60 if err := h.tr.driver.RecoverTask(th); err != nil { 61 // Soft error here to let a new instance get started instead of 62 // failing the task since retrying is unlikely to help. 63 h.logger.Error("error recovering task state", "error", err) 64 return nil 65 } 66 67 taskInfo, err := h.tr.driver.InspectTask(th.Config.ID) 68 if err != nil { 69 // Soft error here to let a new instance get started instead of 70 // failing the task since retrying is unlikely to help. 71 h.logger.Error("error inspecting recovered task state", "error", err) 72 return nil 73 } 74 75 h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), h.tr.clientConfig.MaxKillTimeout, taskInfo.NetworkOverride)) 76 77 h.tr.stateLock.Lock() 78 h.tr.localState.TaskHandle = th 79 h.tr.localState.DriverNetwork = taskInfo.NetworkOverride 80 h.tr.stateLock.Unlock() 81 82 // Ensure the signal is set according to the allocation's state 83 h.setSignal(h.tr.Alloc()) 84 85 // Emit TaskStarted manually since the normal task runner logic will 86 // treat this task like a restored task and skip emitting started. 87 h.tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 88 89 return nil 90 } 91 92 // PreKilling tells the remote task driver to detach a remote task instead of 93 // stopping it. 94 func (h *remoteTaskHook) PreKilling(ctx context.Context, req *interfaces.TaskPreKillRequest, resp *interfaces.TaskPreKillResponse) error { 95 alloc := h.tr.Alloc() 96 h.setSignal(alloc) 97 return nil 98 } 99 100 // setSignal to detach if the allocation is lost or draining. Safe to call 101 // multiple times as it only transitions to using detach -- never back to kill. 102 func (h *remoteTaskHook) setSignal(alloc *structs.Allocation) { 103 driverHandle := h.tr.getDriverHandle() 104 if driverHandle == nil { 105 // Nothing to do exit early 106 return 107 } 108 109 switch { 110 case alloc.ClientStatus == structs.AllocClientStatusLost: 111 // Continue on; lost allocs should just detach 112 h.logger.Debug("detaching from remote task since alloc was lost") 113 case alloc.DesiredTransition.ShouldMigrate(): 114 // Continue on; migrating allocs should just detach 115 h.logger.Debug("detaching from remote task since alloc was drained") 116 default: 117 // Nothing to do exit early 118 return 119 } 120 121 // Set DetachSignal to indicate to the remote task driver that it 122 // should detach this remote task and ignore it. 123 driverHandle.SetKillSignal(drivers.DetachSignal) 124 }