github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/lifecycle.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/hashicorp/nomad/nomad/structs"
     7  )
     8  
     9  // Restart restarts a task that is already running. Returns an error if the
    10  // task is not running. Blocks until existing task exits or passed-in context
    11  // is canceled.
    12  func (tr *TaskRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error {
    13  	tr.logger.Trace("Restart requested", "failure", failure, "event", event.GoString())
    14  
    15  	taskState := tr.TaskState()
    16  	if taskState == nil {
    17  		return ErrTaskNotRunning
    18  	}
    19  
    20  	switch taskState.State {
    21  	case structs.TaskStatePending, structs.TaskStateDead:
    22  		return ErrTaskNotRunning
    23  	}
    24  
    25  	return tr.restartImpl(ctx, event, failure)
    26  }
    27  
    28  // ForceRestart restarts a task that is already running or reruns it if dead.
    29  // Returns an error if the task is not able to rerun. Blocks until existing
    30  // task exits or passed-in context is canceled.
    31  //
    32  // Callers must restart the AllocRuner taskCoordinator beforehand to make sure
    33  // the task will be able to run again.
    34  func (tr *TaskRunner) ForceRestart(ctx context.Context, event *structs.TaskEvent, failure bool) error {
    35  	tr.logger.Trace("Force restart requested", "failure", failure, "event", event.GoString())
    36  
    37  	taskState := tr.TaskState()
    38  	if taskState == nil {
    39  		return ErrTaskNotRunning
    40  	}
    41  
    42  	tr.stateLock.Lock()
    43  	localState := tr.localState.Copy()
    44  	tr.stateLock.Unlock()
    45  
    46  	if localState == nil {
    47  		return ErrTaskNotRunning
    48  	}
    49  
    50  	switch taskState.State {
    51  	case structs.TaskStatePending:
    52  		return ErrTaskNotRunning
    53  
    54  	case structs.TaskStateDead:
    55  		// Tasks that are in the "dead" state are only allowed to restart if
    56  		// their Run() method is still active.
    57  		if localState.RunComplete {
    58  			return ErrTaskNotRunning
    59  		}
    60  	}
    61  
    62  	return tr.restartImpl(ctx, event, failure)
    63  }
    64  
    65  // restartImpl implements to task restart process.
    66  //
    67  // It should never be called directly as it doesn't verify if the task state
    68  // allows for a restart.
    69  func (tr *TaskRunner) restartImpl(ctx context.Context, event *structs.TaskEvent, failure bool) error {
    70  
    71  	// Check if the task is able to restart based on its state and the type of
    72  	// restart event that was triggered.
    73  	taskState := tr.TaskState()
    74  	if taskState == nil {
    75  		return ErrTaskNotRunning
    76  	}
    77  
    78  	// Emit the event since it may take a long time to kill
    79  	tr.EmitEvent(event)
    80  
    81  	// Tell the restart tracker that a restart triggered the exit
    82  	tr.restartTracker.SetRestartTriggered(failure)
    83  
    84  	// Signal a restart to unblock tasks that are in the "dead" state, but
    85  	// don't block since the channel is buffered. Only one signal is enough to
    86  	// notify the tr.Run() loop.
    87  	// The channel must be signaled after SetRestartTriggered is called so the
    88  	// tr.Run() loop runs again.
    89  	if taskState.State == structs.TaskStateDead {
    90  		select {
    91  		case tr.restartCh <- struct{}{}:
    92  		default:
    93  		}
    94  	}
    95  
    96  	// Grab the handle to see if the task is still running and needs to be
    97  	// killed.
    98  	handle := tr.getDriverHandle()
    99  	if handle == nil {
   100  		return nil
   101  	}
   102  
   103  	// Run the pre-kill hooks prior to restarting the task
   104  	tr.preKill()
   105  
   106  	// Grab a handle to the wait channel that will timeout with context cancelation
   107  	// _before_ killing the task.
   108  	waitCh, err := handle.WaitCh(ctx)
   109  	if err != nil {
   110  		return err
   111  	}
   112  
   113  	// Kill the task using an exponential backoff in-case of failures.
   114  	if _, err := tr.killTask(handle, waitCh); err != nil {
   115  		// We couldn't successfully destroy the resource created.
   116  		tr.logger.Error("failed to kill task. Resources may have been leaked", "error", err)
   117  	}
   118  
   119  	select {
   120  	case <-waitCh:
   121  	case <-ctx.Done():
   122  	}
   123  	return nil
   124  }
   125  
   126  func (tr *TaskRunner) Signal(event *structs.TaskEvent, s string) error {
   127  	tr.logger.Trace("Signal requested", "signal", s)
   128  
   129  	// Grab the handle
   130  	handle := tr.getDriverHandle()
   131  
   132  	// Check it is running
   133  	if handle == nil {
   134  		return ErrTaskNotRunning
   135  	}
   136  
   137  	// Emit the event
   138  	tr.EmitEvent(event)
   139  
   140  	// Send the signal
   141  	return handle.Signal(s)
   142  }
   143  
   144  // Kill a task. Blocks until task exits or context is canceled. State is set to
   145  // dead.
   146  func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error {
   147  	tr.logger.Trace("Kill requested")
   148  
   149  	// Cancel the task runner to break out of restart delay or the main run
   150  	// loop.
   151  	tr.killCtxCancel()
   152  
   153  	// Emit kill event
   154  	if event != nil {
   155  		tr.logger.Trace("Kill event", "event_type", event.Type, "event_reason", event.KillReason)
   156  		tr.EmitEvent(event)
   157  	}
   158  
   159  	select {
   160  	case <-tr.WaitCh():
   161  	case <-ctx.Done():
   162  		return ctx.Err()
   163  	}
   164  
   165  	return tr.getKillErr()
   166  }
   167  
   168  func (tr *TaskRunner) IsRunning() bool {
   169  	return tr.getDriverHandle() != nil
   170  }