github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/remotetask_hook.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  
     6  	hclog "github.com/hashicorp/go-hclog"
     7  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	"github.com/hashicorp/nomad/plugins/drivers"
    10  )
    11  
    12  var _ interfaces.TaskPrestartHook = (*remoteTaskHook)(nil)
    13  var _ interfaces.TaskPreKillHook = (*remoteTaskHook)(nil)
    14  
    15  // remoteTaskHook reattaches to remotely executing tasks.
    16  type remoteTaskHook struct {
    17  	tr *TaskRunner
    18  
    19  	logger hclog.Logger
    20  }
    21  
    22  func newRemoteTaskHook(tr *TaskRunner, logger hclog.Logger) interfaces.TaskHook {
    23  	h := &remoteTaskHook{
    24  		tr: tr,
    25  	}
    26  	h.logger = logger.Named(h.Name())
    27  	return h
    28  }
    29  
    30  func (h *remoteTaskHook) Name() string {
    31  	return "remote_task"
    32  }
    33  
    34  // Prestart performs 2 remote task driver related tasks:
    35  //  1. If there is no local handle, see if there is a handle propagated from a
    36  //     previous alloc to be restored.
    37  //  2. If the alloc is lost make sure the task signal is set to detach instead
    38  //     of kill.
    39  func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
    40  	if h.tr.getDriverHandle() != nil {
    41  		// Driver handle already exists so don't try to load remote
    42  		// task handle
    43  		return nil
    44  	}
    45  
    46  	h.tr.stateLock.Lock()
    47  	th := drivers.NewTaskHandleFromState(h.tr.state)
    48  	h.tr.stateLock.Unlock()
    49  
    50  	// Task handle will be nil if there was no previous allocation or if
    51  	// this is a destructive update
    52  	if th == nil {
    53  		resp.Done = true
    54  		return nil
    55  	}
    56  
    57  	// The task config is unique per invocation so recreate it here
    58  	th.Config = h.tr.buildTaskConfig()
    59  
    60  	if err := h.tr.driver.RecoverTask(th); err != nil {
    61  		// Soft error here to let a new instance get started instead of
    62  		// failing the task since retrying is unlikely to help.
    63  		h.logger.Error("error recovering task state", "error", err)
    64  		return nil
    65  	}
    66  
    67  	taskInfo, err := h.tr.driver.InspectTask(th.Config.ID)
    68  	if err != nil {
    69  		// Soft error here to let a new instance get started instead of
    70  		// failing the task since retrying is unlikely to help.
    71  		h.logger.Error("error inspecting recovered task state", "error", err)
    72  		return nil
    73  	}
    74  
    75  	h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), h.tr.clientConfig.MaxKillTimeout, taskInfo.NetworkOverride))
    76  
    77  	h.tr.stateLock.Lock()
    78  	h.tr.localState.TaskHandle = th
    79  	h.tr.localState.DriverNetwork = taskInfo.NetworkOverride
    80  	h.tr.stateLock.Unlock()
    81  
    82  	// Ensure the signal is set according to the allocation's state
    83  	h.setSignal(h.tr.Alloc())
    84  
    85  	// Emit TaskStarted manually since the normal task runner logic will
    86  	// treat this task like a restored task and skip emitting started.
    87  	h.tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
    88  
    89  	return nil
    90  }
    91  
    92  // PreKilling tells the remote task driver to detach a remote task instead of
    93  // stopping it.
    94  func (h *remoteTaskHook) PreKilling(ctx context.Context, req *interfaces.TaskPreKillRequest, resp *interfaces.TaskPreKillResponse) error {
    95  	alloc := h.tr.Alloc()
    96  	h.setSignal(alloc)
    97  	return nil
    98  }
    99  
   100  // setSignal to detach if the allocation is lost or draining. Safe to call
   101  // multiple times as it only transitions to using detach -- never back to kill.
   102  func (h *remoteTaskHook) setSignal(alloc *structs.Allocation) {
   103  	driverHandle := h.tr.getDriverHandle()
   104  	if driverHandle == nil {
   105  		// Nothing to do exit early
   106  		return
   107  	}
   108  
   109  	switch {
   110  	case alloc.ClientStatus == structs.AllocClientStatusLost:
   111  		// Continue on; lost allocs should just detach
   112  		h.logger.Debug("detaching from remote task since alloc was lost")
   113  	case alloc.DesiredTransition.ShouldMigrate():
   114  		// Continue on; migrating allocs should just detach
   115  		h.logger.Debug("detaching from remote task since alloc was drained")
   116  	default:
   117  		// Nothing to do exit early
   118  		return
   119  	}
   120  
   121  	// Set DetachSignal to indicate to the remote task driver that it
   122  	// should detach this remote task and ignore it.
   123  	driverHandle.SetKillSignal(drivers.DetachSignal)
   124  }