github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/lifecycle.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 6 "github.com/hashicorp/nomad/nomad/structs" 7 ) 8 9 // Restart restarts a task that is already running. Returns an error if the 10 // task is not running. Blocks until existing task exits or passed-in context 11 // is canceled. 12 func (tr *TaskRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error { 13 tr.logger.Trace("Restart requested", "failure", failure, "event", event.GoString()) 14 15 taskState := tr.TaskState() 16 if taskState == nil { 17 return ErrTaskNotRunning 18 } 19 20 switch taskState.State { 21 case structs.TaskStatePending, structs.TaskStateDead: 22 return ErrTaskNotRunning 23 } 24 25 return tr.restartImpl(ctx, event, failure) 26 } 27 28 // ForceRestart restarts a task that is already running or reruns it if dead. 29 // Returns an error if the task is not able to rerun. Blocks until existing 30 // task exits or passed-in context is canceled. 31 // 32 // Callers must restart the AllocRuner taskCoordinator beforehand to make sure 33 // the task will be able to run again. 34 func (tr *TaskRunner) ForceRestart(ctx context.Context, event *structs.TaskEvent, failure bool) error { 35 tr.logger.Trace("Force restart requested", "failure", failure, "event", event.GoString()) 36 37 taskState := tr.TaskState() 38 if taskState == nil { 39 return ErrTaskNotRunning 40 } 41 42 tr.stateLock.Lock() 43 localState := tr.localState.Copy() 44 tr.stateLock.Unlock() 45 46 if localState == nil { 47 return ErrTaskNotRunning 48 } 49 50 switch taskState.State { 51 case structs.TaskStatePending: 52 return ErrTaskNotRunning 53 54 case structs.TaskStateDead: 55 // Tasks that are in the "dead" state are only allowed to restart if 56 // their Run() method is still active. 57 if localState.RunComplete { 58 return ErrTaskNotRunning 59 } 60 } 61 62 return tr.restartImpl(ctx, event, failure) 63 } 64 65 // restartImpl implements to task restart process. 66 // 67 // It should never be called directly as it doesn't verify if the task state 68 // allows for a restart. 69 func (tr *TaskRunner) restartImpl(ctx context.Context, event *structs.TaskEvent, failure bool) error { 70 71 // Check if the task is able to restart based on its state and the type of 72 // restart event that was triggered. 73 taskState := tr.TaskState() 74 if taskState == nil { 75 return ErrTaskNotRunning 76 } 77 78 // Emit the event since it may take a long time to kill 79 tr.EmitEvent(event) 80 81 // Tell the restart tracker that a restart triggered the exit 82 tr.restartTracker.SetRestartTriggered(failure) 83 84 // Signal a restart to unblock tasks that are in the "dead" state, but 85 // don't block since the channel is buffered. Only one signal is enough to 86 // notify the tr.Run() loop. 87 // The channel must be signaled after SetRestartTriggered is called so the 88 // tr.Run() loop runs again. 89 if taskState.State == structs.TaskStateDead { 90 select { 91 case tr.restartCh <- struct{}{}: 92 default: 93 } 94 } 95 96 // Grab the handle to see if the task is still running and needs to be 97 // killed. 98 handle := tr.getDriverHandle() 99 if handle == nil { 100 return nil 101 } 102 103 // Run the pre-kill hooks prior to restarting the task 104 tr.preKill() 105 106 // Grab a handle to the wait channel that will timeout with context cancelation 107 // _before_ killing the task. 108 waitCh, err := handle.WaitCh(ctx) 109 if err != nil { 110 return err 111 } 112 113 // Kill the task using an exponential backoff in-case of failures. 114 if _, err := tr.killTask(handle, waitCh); err != nil { 115 // We couldn't successfully destroy the resource created. 116 tr.logger.Error("failed to kill task. Resources may have been leaked", "error", err) 117 } 118 119 select { 120 case <-waitCh: 121 case <-ctx.Done(): 122 } 123 return nil 124 } 125 126 func (tr *TaskRunner) Signal(event *structs.TaskEvent, s string) error { 127 tr.logger.Trace("Signal requested", "signal", s) 128 129 // Grab the handle 130 handle := tr.getDriverHandle() 131 132 // Check it is running 133 if handle == nil { 134 return ErrTaskNotRunning 135 } 136 137 // Emit the event 138 tr.EmitEvent(event) 139 140 // Send the signal 141 return handle.Signal(s) 142 } 143 144 // Kill a task. Blocks until task exits or context is canceled. State is set to 145 // dead. 146 func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error { 147 tr.logger.Trace("Kill requested") 148 149 // Cancel the task runner to break out of restart delay or the main run 150 // loop. 151 tr.killCtxCancel() 152 153 // Emit kill event 154 if event != nil { 155 tr.logger.Trace("Kill event", "event_type", event.Type, "event_reason", event.KillReason) 156 tr.EmitEvent(event) 157 } 158 159 select { 160 case <-tr.WaitCh(): 161 case <-ctx.Done(): 162 return ctx.Err() 163 } 164 165 return tr.getKillErr() 166 } 167 168 func (tr *TaskRunner) IsRunning() bool { 169 return tr.getDriverHandle() != nil 170 }