github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/task_runner.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/task_runner.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/client/lib/cgutil"
    12  	"golang.org/x/exp/slices"
    13  
    14  	metrics "github.com/armon/go-metrics"
    15  	log "github.com/hashicorp/go-hclog"
    16  	multierror "github.com/hashicorp/go-multierror"
    17  	"github.com/hashicorp/hcl/v2/hcldec"
    18  	"github.com/hashicorp/nomad/client/allocdir"
    19  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    20  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/restarts"
    21  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
    22  	"github.com/hashicorp/nomad/client/config"
    23  	"github.com/hashicorp/nomad/client/consul"
    24  	"github.com/hashicorp/nomad/client/devicemanager"
    25  	"github.com/hashicorp/nomad/client/dynamicplugins"
    26  	cinterfaces "github.com/hashicorp/nomad/client/interfaces"
    27  	"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
    28  	"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
    29  	"github.com/hashicorp/nomad/client/serviceregistration"
    30  	"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
    31  	cstate "github.com/hashicorp/nomad/client/state"
    32  	cstructs "github.com/hashicorp/nomad/client/structs"
    33  	"github.com/hashicorp/nomad/client/taskenv"
    34  	"github.com/hashicorp/nomad/client/vaultclient"
    35  	"github.com/hashicorp/nomad/helper"
    36  	"github.com/hashicorp/nomad/helper/pluginutils/hclspecutils"
    37  	"github.com/hashicorp/nomad/helper/pluginutils/hclutils"
    38  	"github.com/hashicorp/nomad/helper/uuid"
    39  	"github.com/hashicorp/nomad/nomad/structs"
    40  	bstructs "github.com/hashicorp/nomad/plugins/base/structs"
    41  	"github.com/hashicorp/nomad/plugins/drivers"
    42  )
    43  
    44  const (
    45  	// defaultMaxEvents is the default max capacity for task events on the
    46  	// task state. Overrideable for testing.
    47  	defaultMaxEvents = 10
    48  
    49  	// killBackoffBaseline is the baseline time for exponential backoff while
    50  	// killing a task.
    51  	killBackoffBaseline = 5 * time.Second
    52  
    53  	// killBackoffLimit is the limit of the exponential backoff for killing
    54  	// the task.
    55  	killBackoffLimit = 2 * time.Minute
    56  
    57  	// killFailureLimit is how many times we will attempt to kill a task before
    58  	// giving up and potentially leaking resources.
    59  	killFailureLimit = 5
    60  
    61  	// triggerUpdateChCap is the capacity for the triggerUpdateCh used for
    62  	// triggering updates. It should be exactly 1 as even if multiple
    63  	// updates have come in since the last one was handled, we only need to
    64  	// handle the last one.
    65  	triggerUpdateChCap = 1
    66  
    67  	// restartChCap is the capacity for the restartCh used for triggering task
    68  	// restarts. It should be exactly 1 as even if multiple restarts have come
    69  	// we only need to handle the last one.
    70  	restartChCap = 1
    71  )
    72  
    73  type TaskRunner struct {
    74  	// allocID, taskName, taskLeader, and taskResources are immutable so these fields may
    75  	// be accessed without locks
    76  	allocID       string
    77  	taskName      string
    78  	taskLeader    bool
    79  	taskResources *structs.AllocatedTaskResources
    80  
    81  	alloc     *structs.Allocation
    82  	allocLock sync.Mutex
    83  
    84  	clientConfig *config.Config
    85  
    86  	// stateUpdater is used to emit updated task state
    87  	stateUpdater interfaces.TaskStateHandler
    88  
    89  	// state captures the state of the task for updating the allocation
    90  	// Must acquire stateLock to access.
    91  	state *structs.TaskState
    92  
    93  	// localState captures the node-local state of the task for when the
    94  	// Nomad agent restarts.
    95  	// Must acquire stateLock to access.
    96  	localState *state.LocalState
    97  
    98  	// stateLock must be acquired when accessing state or localState.
    99  	stateLock sync.RWMutex
   100  
   101  	// stateDB is for persisting localState and taskState
   102  	stateDB cstate.StateDB
   103  
   104  	// restartCh is used to signal that the task should restart.
   105  	restartCh chan struct{}
   106  
   107  	// shutdownCtx is used to exit the TaskRunner *without* affecting task state.
   108  	shutdownCtx context.Context
   109  
   110  	// shutdownCtxCancel causes the TaskRunner to exit immediately without
   111  	// affecting task state. Useful for testing or graceful agent shutdown.
   112  	shutdownCtxCancel context.CancelFunc
   113  
   114  	// killCtx is the task runner's context representing the tasks's lifecycle.
   115  	// The context is canceled when the task is killed.
   116  	killCtx context.Context
   117  
   118  	// killCtxCancel is called when killing a task.
   119  	killCtxCancel context.CancelFunc
   120  
   121  	// killErr is populated when killing a task. Access should be done use the
   122  	// getter/setter
   123  	killErr     error
   124  	killErrLock sync.Mutex
   125  
   126  	// shutdownDelayCtx is a context from the alloc runner which will
   127  	// tell us to exit early from shutdown_delay
   128  	shutdownDelayCtx      context.Context
   129  	shutdownDelayCancelFn context.CancelFunc
   130  
   131  	// Logger is the logger for the task runner.
   132  	logger log.Logger
   133  
   134  	// triggerUpdateCh is ticked whenever update hooks need to be run and
   135  	// must be created with cap=1 to signal a pending update and prevent
   136  	// callers from deadlocking if the receiver has exited.
   137  	triggerUpdateCh chan struct{}
   138  
   139  	// waitCh is closed when the task runner has transitioned to a terminal
   140  	// state
   141  	waitCh chan struct{}
   142  
   143  	// driver is the driver for the task.
   144  	driver drivers.DriverPlugin
   145  
   146  	// driverCapabilities is the set capabilities the driver supports
   147  	driverCapabilities *drivers.Capabilities
   148  
   149  	// taskSchema is the hcl spec for the task driver configuration
   150  	taskSchema hcldec.Spec
   151  
   152  	// handleLock guards access to handle and handleResult
   153  	handleLock sync.Mutex
   154  
   155  	// handle to the running driver
   156  	handle *DriverHandle
   157  
   158  	// task is the task being run
   159  	task     *structs.Task
   160  	taskLock sync.RWMutex
   161  
   162  	// taskDir is the directory structure for this task.
   163  	taskDir *allocdir.TaskDir
   164  
   165  	// envBuilder is used to build the task's environment
   166  	envBuilder *taskenv.Builder
   167  
   168  	// restartTracker is used to decide if the task should be restarted.
   169  	restartTracker *restarts.RestartTracker
   170  
   171  	// runnerHooks are task runner lifecycle hooks that should be run on state
   172  	// transistions.
   173  	runnerHooks []interfaces.TaskHook
   174  
   175  	// hookResources captures the resources provided by hooks
   176  	hookResources *hookResources
   177  
   178  	// consulClient is the client used by the consul service hook for
   179  	// registering services and checks
   180  	consulServiceClient serviceregistration.Handler
   181  
   182  	// consulProxiesClient is the client used by the envoy version hook for
   183  	// asking consul what version of envoy nomad should inject into the connect
   184  	// sidecar or gateway task.
   185  	consulProxiesClient consul.SupportedProxiesAPI
   186  
   187  	// sidsClient is the client used by the service identity hook for managing
   188  	// service identity tokens
   189  	siClient consul.ServiceIdentityAPI
   190  
   191  	// vaultClient is the client to use to derive and renew Vault tokens
   192  	vaultClient vaultclient.VaultClient
   193  
   194  	// vaultToken is the current Vault token. It should be accessed with the
   195  	// getter.
   196  	vaultToken     string
   197  	vaultTokenLock sync.Mutex
   198  
   199  	// nomadToken is the current Nomad workload identity token. It
   200  	// should be accessed with the getter.
   201  	nomadToken     string
   202  	nomadTokenLock sync.Mutex
   203  
   204  	// baseLabels are used when emitting tagged metrics. All task runner metrics
   205  	// will have these tags, and optionally more.
   206  	baseLabels []metrics.Label
   207  
   208  	// logmonHookConfig is used to get the paths to the stdout and stderr fifos
   209  	// to be passed to the driver for task logging
   210  	logmonHookConfig *logmonHookConfig
   211  
   212  	// resourceUsage is written via UpdateStats and read via
   213  	// LatestResourceUsage. May be nil at all times.
   214  	resourceUsage     *cstructs.TaskResourceUsage
   215  	resourceUsageLock sync.Mutex
   216  
   217  	// deviceStatsReporter is used to lookup resource usage for alloc devices
   218  	deviceStatsReporter cinterfaces.DeviceStatsReporter
   219  
   220  	// csiManager is used to manage the mounting of CSI volumes into tasks
   221  	csiManager csimanager.Manager
   222  
   223  	// devicemanager is used to mount devices as well as lookup device
   224  	// statistics
   225  	devicemanager devicemanager.Manager
   226  
   227  	// cpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform
   228  	cpusetCgroupPathGetter cgutil.CgroupPathGetter
   229  
   230  	// driverManager is used to dispense driver plugins and register event
   231  	// handlers
   232  	driverManager drivermanager.Manager
   233  
   234  	// dynamicRegistry is where dynamic plugins should be registered.
   235  	dynamicRegistry dynamicplugins.Registry
   236  
   237  	// maxEvents is the capacity of the TaskEvents on the TaskState.
   238  	// Defaults to defaultMaxEvents but overrideable for testing.
   239  	maxEvents int
   240  
   241  	// serversContactedCh is passed to TaskRunners so they can detect when
   242  	// GetClientAllocs has been called in case of a failed restore.
   243  	serversContactedCh <-chan struct{}
   244  
   245  	// startConditionMetCh signals the TaskRunner when it should start the task
   246  	startConditionMetCh <-chan struct{}
   247  
   248  	// waitOnServers defaults to false but will be set true if a restore
   249  	// fails and the Run method should wait until serversContactedCh is
   250  	// closed.
   251  	waitOnServers bool
   252  
   253  	networkIsolationLock sync.Mutex
   254  	networkIsolationSpec *drivers.NetworkIsolationSpec
   255  
   256  	allocHookResources *cstructs.AllocHookResources
   257  
   258  	// serviceRegWrapper is the handler wrapper that is used by service hooks
   259  	// to perform service and check registration and deregistration.
   260  	serviceRegWrapper *wrapper.HandlerWrapper
   261  
   262  	// getter is an interface for retrieving artifacts.
   263  	getter cinterfaces.ArtifactGetter
   264  }
   265  
   266  type Config struct {
   267  	Alloc        *structs.Allocation
   268  	ClientConfig *config.Config
   269  	Task         *structs.Task
   270  	TaskDir      *allocdir.TaskDir
   271  	Logger       log.Logger
   272  
   273  	// Consul is the client to use for managing Consul service registrations
   274  	Consul serviceregistration.Handler
   275  
   276  	// ConsulProxies is the client to use for looking up supported envoy versions
   277  	// from Consul.
   278  	ConsulProxies consul.SupportedProxiesAPI
   279  
   280  	// ConsulSI is the client to use for managing Consul SI tokens
   281  	ConsulSI consul.ServiceIdentityAPI
   282  
   283  	// DynamicRegistry is where dynamic plugins should be registered.
   284  	DynamicRegistry dynamicplugins.Registry
   285  
   286  	// Vault is the client to use to derive and renew Vault tokens
   287  	Vault vaultclient.VaultClient
   288  
   289  	// StateDB is used to store and restore state.
   290  	StateDB cstate.StateDB
   291  
   292  	// StateUpdater is used to emit updated task state
   293  	StateUpdater interfaces.TaskStateHandler
   294  
   295  	// deviceStatsReporter is used to lookup resource usage for alloc devices
   296  	DeviceStatsReporter cinterfaces.DeviceStatsReporter
   297  
   298  	// CSIManager is used to manage the mounting of CSI volumes into tasks
   299  	CSIManager csimanager.Manager
   300  
   301  	// CpusetCgroupPathGetter is used to lookup the cgroup path if supported by the platform
   302  	CpusetCgroupPathGetter cgutil.CgroupPathGetter
   303  
   304  	// DeviceManager is used to mount devices as well as lookup device
   305  	// statistics
   306  	DeviceManager devicemanager.Manager
   307  
   308  	// DriverManager is used to dispense driver plugins and register event
   309  	// handlers
   310  	DriverManager drivermanager.Manager
   311  
   312  	// ServersContactedCh is closed when the first GetClientAllocs call to
   313  	// servers succeeds and allocs are synced.
   314  	ServersContactedCh chan struct{}
   315  
   316  	// StartConditionMetCh signals the TaskRunner when it should start the task
   317  	StartConditionMetCh <-chan struct{}
   318  
   319  	// ShutdownDelayCtx is a context from the alloc runner which will
   320  	// tell us to exit early from shutdown_delay
   321  	ShutdownDelayCtx context.Context
   322  
   323  	// ShutdownDelayCancelFn should only be used in testing.
   324  	ShutdownDelayCancelFn context.CancelFunc
   325  
   326  	// ServiceRegWrapper is the handler wrapper that is used by service hooks
   327  	// to perform service and check registration and deregistration.
   328  	ServiceRegWrapper *wrapper.HandlerWrapper
   329  
   330  	// Getter is an interface for retrieving artifacts.
   331  	Getter cinterfaces.ArtifactGetter
   332  }
   333  
   334  func NewTaskRunner(config *Config) (*TaskRunner, error) {
   335  	// Create a context for causing the runner to exit
   336  	trCtx, trCancel := context.WithCancel(context.Background())
   337  
   338  	// Create a context for killing the runner
   339  	killCtx, killCancel := context.WithCancel(context.Background())
   340  
   341  	// Initialize the environment builder
   342  	envBuilder := taskenv.NewBuilder(
   343  		config.ClientConfig.Node,
   344  		config.Alloc,
   345  		config.Task,
   346  		config.ClientConfig.Region,
   347  	)
   348  
   349  	// Initialize state from alloc if it is set
   350  	tstate := structs.NewTaskState()
   351  	if ts := config.Alloc.TaskStates[config.Task.Name]; ts != nil {
   352  		tstate = ts.Copy()
   353  	}
   354  
   355  	tr := &TaskRunner{
   356  		alloc:                  config.Alloc,
   357  		allocID:                config.Alloc.ID,
   358  		clientConfig:           config.ClientConfig,
   359  		task:                   config.Task,
   360  		taskDir:                config.TaskDir,
   361  		taskName:               config.Task.Name,
   362  		taskLeader:             config.Task.Leader,
   363  		envBuilder:             envBuilder,
   364  		dynamicRegistry:        config.DynamicRegistry,
   365  		consulServiceClient:    config.Consul,
   366  		consulProxiesClient:    config.ConsulProxies,
   367  		siClient:               config.ConsulSI,
   368  		vaultClient:            config.Vault,
   369  		state:                  tstate,
   370  		localState:             state.NewLocalState(),
   371  		stateDB:                config.StateDB,
   372  		stateUpdater:           config.StateUpdater,
   373  		deviceStatsReporter:    config.DeviceStatsReporter,
   374  		killCtx:                killCtx,
   375  		killCtxCancel:          killCancel,
   376  		shutdownCtx:            trCtx,
   377  		shutdownCtxCancel:      trCancel,
   378  		triggerUpdateCh:        make(chan struct{}, triggerUpdateChCap),
   379  		restartCh:              make(chan struct{}, restartChCap),
   380  		waitCh:                 make(chan struct{}),
   381  		csiManager:             config.CSIManager,
   382  		cpusetCgroupPathGetter: config.CpusetCgroupPathGetter,
   383  		devicemanager:          config.DeviceManager,
   384  		driverManager:          config.DriverManager,
   385  		maxEvents:              defaultMaxEvents,
   386  		serversContactedCh:     config.ServersContactedCh,
   387  		startConditionMetCh:    config.StartConditionMetCh,
   388  		shutdownDelayCtx:       config.ShutdownDelayCtx,
   389  		shutdownDelayCancelFn:  config.ShutdownDelayCancelFn,
   390  		serviceRegWrapper:      config.ServiceRegWrapper,
   391  		getter:                 config.Getter,
   392  	}
   393  
   394  	// Create the logger based on the allocation ID
   395  	tr.logger = config.Logger.Named("task_runner").With("task", config.Task.Name)
   396  
   397  	// Pull out the task's resources
   398  	ares := tr.alloc.AllocatedResources
   399  	if ares == nil {
   400  		return nil, fmt.Errorf("no task resources found on allocation")
   401  	}
   402  
   403  	tres, ok := ares.Tasks[tr.taskName]
   404  	if !ok {
   405  		return nil, fmt.Errorf("no task resources found on allocation")
   406  	}
   407  	tr.taskResources = tres
   408  
   409  	// Build the restart tracker.
   410  	rp := config.Task.RestartPolicy
   411  	if rp == nil {
   412  		tg := tr.alloc.Job.LookupTaskGroup(tr.alloc.TaskGroup)
   413  		if tg == nil {
   414  			tr.logger.Error("alloc missing task group")
   415  			return nil, fmt.Errorf("alloc missing task group")
   416  		}
   417  		rp = tg.RestartPolicy
   418  	}
   419  	tr.restartTracker = restarts.NewRestartTracker(rp, tr.alloc.Job.Type, config.Task.Lifecycle)
   420  
   421  	// Get the driver
   422  	if err := tr.initDriver(); err != nil {
   423  		tr.logger.Error("failed to create driver", "error", err)
   424  		return nil, err
   425  	}
   426  
   427  	// Use the client secret only as the initial value; the identity hook will
   428  	// update this with a workload identity if one is available
   429  	tr.setNomadToken(config.ClientConfig.Node.SecretID)
   430  
   431  	// Initialize the runners hooks. Must come after initDriver so hooks
   432  	// can use tr.driverCapabilities
   433  	tr.initHooks()
   434  
   435  	// Initialize base labels
   436  	tr.initLabels()
   437  
   438  	// Initialize initial task received event
   439  	tr.appendEvent(structs.NewTaskEvent(structs.TaskReceived))
   440  
   441  	return tr, nil
   442  }
   443  
   444  func (tr *TaskRunner) initLabels() {
   445  	alloc := tr.Alloc()
   446  	tr.baseLabels = []metrics.Label{
   447  		{
   448  			Name:  "job",
   449  			Value: alloc.Job.Name,
   450  		},
   451  		{
   452  			Name:  "task_group",
   453  			Value: alloc.TaskGroup,
   454  		},
   455  		{
   456  			Name:  "alloc_id",
   457  			Value: tr.allocID,
   458  		},
   459  		{
   460  			Name:  "task",
   461  			Value: tr.taskName,
   462  		},
   463  		{
   464  			Name:  "namespace",
   465  			Value: tr.alloc.Namespace,
   466  		},
   467  	}
   468  
   469  	if tr.alloc.Job.ParentID != "" {
   470  		tr.baseLabels = append(tr.baseLabels, metrics.Label{
   471  			Name:  "parent_id",
   472  			Value: tr.alloc.Job.ParentID,
   473  		})
   474  		if strings.Contains(tr.alloc.Job.Name, "/dispatch-") {
   475  			tr.baseLabels = append(tr.baseLabels, metrics.Label{
   476  				Name:  "dispatch_id",
   477  				Value: strings.Split(tr.alloc.Job.Name, "/dispatch-")[1],
   478  			})
   479  		}
   480  		if strings.Contains(tr.alloc.Job.Name, "/periodic-") {
   481  			tr.baseLabels = append(tr.baseLabels, metrics.Label{
   482  				Name:  "periodic_id",
   483  				Value: strings.Split(tr.alloc.Job.Name, "/periodic-")[1],
   484  			})
   485  		}
   486  	}
   487  }
   488  
   489  // MarkFailedDead marks a task as failed and not to run. Aimed to be invoked
   490  // when alloc runner prestart hooks failed. Should never be called with Run().
   491  func (tr *TaskRunner) MarkFailedDead(reason string) {
   492  	defer close(tr.waitCh)
   493  
   494  	tr.stateLock.Lock()
   495  	if err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState); err != nil {
   496  		//TODO Nomad will be unable to restore this task; try to kill
   497  		//     it now and fail? In general we prefer to leave running
   498  		//     tasks running even if the agent encounters an error.
   499  		tr.logger.Warn("error persisting local failed task state; may be unable to restore after a Nomad restart",
   500  			"error", err)
   501  	}
   502  	tr.stateLock.Unlock()
   503  
   504  	event := structs.NewTaskEvent(structs.TaskSetupFailure).
   505  		SetDisplayMessage(reason).
   506  		SetFailsTask()
   507  	tr.UpdateState(structs.TaskStateDead, event)
   508  
   509  	// Run the stop hooks in case task was a restored task that failed prestart
   510  	if err := tr.stop(); err != nil {
   511  		tr.logger.Error("stop failed while marking task dead", "error", err)
   512  	}
   513  }
   514  
   515  // Run the TaskRunner. Starts the user's task or reattaches to a restored task.
   516  // Run closes WaitCh when it exits. Should be started in a goroutine.
   517  func (tr *TaskRunner) Run() {
   518  	defer close(tr.waitCh)
   519  	var result *drivers.ExitResult
   520  
   521  	tr.stateLock.RLock()
   522  	dead := tr.state.State == structs.TaskStateDead
   523  	runComplete := tr.localState.RunComplete
   524  	tr.stateLock.RUnlock()
   525  
   526  	// If restoring a dead task, ensure the task is cleared and, if the local
   527  	// state indicates that the previous Run() call is complete, execute all
   528  	// post stop hooks and exit early, otherwise proceed until the
   529  	// ALLOC_RESTART loop skipping MAIN since the task is dead.
   530  	if dead {
   531  		// do cleanup functions without emitting any additional events/work
   532  		// to handle cases where we restored a dead task where client terminated
   533  		// after task finished before completing post-run actions.
   534  		tr.clearDriverHandle()
   535  		tr.stateUpdater.TaskStateUpdated()
   536  		if runComplete {
   537  			if err := tr.stop(); err != nil {
   538  				tr.logger.Error("stop failed on terminal task", "error", err)
   539  			}
   540  			return
   541  		}
   542  	}
   543  
   544  	// Updates are handled asynchronously with the other hooks but each
   545  	// triggered update - whether due to alloc updates or a new vault token
   546  	// - should be handled serially.
   547  	go tr.handleUpdates()
   548  
   549  	// If restore failed wait until servers are contacted before running.
   550  	// #1795
   551  	if tr.waitOnServers {
   552  		tr.logger.Info("task failed to restore; waiting to contact server before restarting")
   553  		select {
   554  		case <-tr.killCtx.Done():
   555  			tr.logger.Info("task killed while waiting for server contact")
   556  		case <-tr.shutdownCtx.Done():
   557  			return
   558  		case <-tr.serversContactedCh:
   559  			tr.logger.Info("server contacted; unblocking waiting task")
   560  		}
   561  	}
   562  
   563  	// Set the initial task state.
   564  	tr.stateUpdater.TaskStateUpdated()
   565  
   566  	// start with a stopped timer; actual restart delay computed later
   567  	timer, stop := helper.NewStoppedTimer()
   568  	defer stop()
   569  
   570  MAIN:
   571  	for !tr.shouldShutdown() {
   572  		if dead {
   573  			break
   574  		}
   575  
   576  		select {
   577  		case <-tr.killCtx.Done():
   578  			break MAIN
   579  		case <-tr.shutdownCtx.Done():
   580  			// TaskRunner was told to exit immediately
   581  			return
   582  		case <-tr.startConditionMetCh:
   583  			tr.logger.Debug("lifecycle start condition has been met, proceeding")
   584  			// yay proceed
   585  		}
   586  
   587  		// Run the prestart hooks
   588  		if err := tr.prestart(); err != nil {
   589  			tr.logger.Error("prestart failed", "error", err)
   590  			tr.restartTracker.SetStartError(err)
   591  			goto RESTART
   592  		}
   593  
   594  		select {
   595  		case <-tr.killCtx.Done():
   596  			break MAIN
   597  		case <-tr.shutdownCtx.Done():
   598  			// TaskRunner was told to exit immediately
   599  			return
   600  		default:
   601  		}
   602  
   603  		// Run the task
   604  		if err := tr.runDriver(); err != nil {
   605  			tr.logger.Error("running driver failed", "error", err)
   606  			tr.restartTracker.SetStartError(err)
   607  			goto RESTART
   608  		}
   609  
   610  		// Run the poststart hooks
   611  		if err := tr.poststart(); err != nil {
   612  			tr.logger.Error("poststart failed", "error", err)
   613  		}
   614  
   615  		// Grab the result proxy and wait for task to exit
   616  	WAIT:
   617  		{
   618  			handle := tr.getDriverHandle()
   619  			result = nil
   620  
   621  			// Do *not* use tr.killCtx here as it would cause
   622  			// Wait() to unblock before the task exits when Kill()
   623  			// is called.
   624  			if resultCh, err := handle.WaitCh(context.Background()); err != nil {
   625  				tr.logger.Error("wait task failed", "error", err)
   626  			} else {
   627  				select {
   628  				case <-tr.killCtx.Done():
   629  					// We can go through the normal should restart check since
   630  					// the restart tracker knowns it is killed
   631  					result = tr.handleKill(resultCh)
   632  				case <-tr.shutdownCtx.Done():
   633  					// TaskRunner was told to exit immediately
   634  					return
   635  				case result = <-resultCh:
   636  				}
   637  
   638  				// WaitCh returned a result
   639  				if retryWait := tr.handleTaskExitResult(result); retryWait {
   640  					goto WAIT
   641  				}
   642  			}
   643  		}
   644  
   645  		// Clear the handle
   646  		tr.clearDriverHandle()
   647  
   648  		// Store the wait result on the restart tracker
   649  		tr.restartTracker.SetExitResult(result)
   650  
   651  		if err := tr.exited(); err != nil {
   652  			tr.logger.Error("exited hooks failed", "error", err)
   653  		}
   654  
   655  	RESTART:
   656  		restart, restartDelay := tr.shouldRestart()
   657  		if !restart {
   658  			break MAIN
   659  		}
   660  
   661  		timer.Reset(restartDelay)
   662  
   663  		// Actually restart by sleeping and also watching for destroy events
   664  		select {
   665  		case <-timer.C:
   666  		case <-tr.killCtx.Done():
   667  			tr.logger.Trace("task killed between restarts", "delay", restartDelay)
   668  			break MAIN
   669  		case <-tr.shutdownCtx.Done():
   670  			// TaskRunner was told to exit immediately
   671  			tr.logger.Trace("gracefully shutting down during restart delay")
   672  			return
   673  		}
   674  	}
   675  
   676  	// Ensure handle is cleaned up. Restore could have recovered a task
   677  	// that should be terminal, so if the handle still exists we should
   678  	// kill it here.
   679  	if tr.getDriverHandle() != nil {
   680  		if result = tr.handleKill(nil); result != nil {
   681  			tr.emitExitResultEvent(result)
   682  		}
   683  
   684  		tr.clearDriverHandle()
   685  
   686  		if err := tr.exited(); err != nil {
   687  			tr.logger.Error("exited hooks failed while cleaning up terminal task", "error", err)
   688  		}
   689  	}
   690  
   691  	// Mark the task as dead
   692  	tr.UpdateState(structs.TaskStateDead, nil)
   693  
   694  	// Wait here in case the allocation is restarted. Poststop tasks will never
   695  	// run again so skip them to avoid blocking forever.
   696  	if !tr.Task().IsPoststop() {
   697  	ALLOC_RESTART:
   698  		// Run in a loop to handle cases where restartCh is triggered but the
   699  		// task runner doesn't need to restart.
   700  		for {
   701  			select {
   702  			case <-tr.killCtx.Done():
   703  				break ALLOC_RESTART
   704  			case <-tr.shutdownCtx.Done():
   705  				return
   706  			case <-tr.restartCh:
   707  				// Restart without delay since the task is not running anymore.
   708  				restart, _ := tr.shouldRestart()
   709  				if restart {
   710  					// Set runner as not dead to allow the MAIN loop to run.
   711  					dead = false
   712  					goto MAIN
   713  				}
   714  			}
   715  		}
   716  	}
   717  
   718  	tr.stateLock.Lock()
   719  	tr.localState.RunComplete = true
   720  	err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState)
   721  	if err != nil {
   722  		tr.logger.Warn("error persisting task state on run loop exit", "error", err)
   723  	}
   724  	tr.stateLock.Unlock()
   725  
   726  	// Run the stop hooks
   727  	if err := tr.stop(); err != nil {
   728  		tr.logger.Error("stop failed", "error", err)
   729  	}
   730  
   731  	tr.logger.Debug("task run loop exiting")
   732  }
   733  
   734  func (tr *TaskRunner) shouldShutdown() bool {
   735  	alloc := tr.Alloc()
   736  	if alloc.ClientTerminalStatus() {
   737  		return true
   738  	}
   739  
   740  	if !tr.IsPoststopTask() && alloc.ServerTerminalStatus() {
   741  		return true
   742  	}
   743  
   744  	return false
   745  }
   746  
   747  // handleTaskExitResult handles the results returned by the task exiting. If
   748  // retryWait is true, the caller should attempt to wait on the task again since
   749  // it has not actually finished running. This can happen if the driver plugin
   750  // has exited.
   751  func (tr *TaskRunner) handleTaskExitResult(result *drivers.ExitResult) (retryWait bool) {
   752  	if result == nil {
   753  		return false
   754  	}
   755  
   756  	if result.Err == bstructs.ErrPluginShutdown {
   757  		dn := tr.Task().Driver
   758  		tr.logger.Debug("driver plugin has shutdown; attempting to recover task", "driver", dn)
   759  
   760  		// Initialize a new driver handle
   761  		if err := tr.initDriver(); err != nil {
   762  			tr.logger.Error("failed to initialize driver after it exited unexpectedly", "error", err, "driver", dn)
   763  			return false
   764  		}
   765  
   766  		// Try to restore the handle
   767  		tr.stateLock.RLock()
   768  		h := tr.localState.TaskHandle
   769  		net := tr.localState.DriverNetwork
   770  		tr.stateLock.RUnlock()
   771  		if !tr.restoreHandle(h, net) {
   772  			tr.logger.Error("failed to restore handle on driver after it exited unexpectedly", "driver", dn)
   773  			return false
   774  		}
   775  
   776  		tr.logger.Debug("task successfully recovered on driver", "driver", dn)
   777  		return true
   778  	}
   779  
   780  	// Emit Terminated event
   781  	tr.emitExitResultEvent(result)
   782  
   783  	return false
   784  }
   785  
   786  // emitExitResultEvent emits a TaskTerminated event for an ExitResult.
   787  func (tr *TaskRunner) emitExitResultEvent(result *drivers.ExitResult) {
   788  	event := structs.NewTaskEvent(structs.TaskTerminated).
   789  		SetExitCode(result.ExitCode).
   790  		SetSignal(result.Signal).
   791  		SetOOMKilled(result.OOMKilled).
   792  		SetExitMessage(result.Err)
   793  
   794  	tr.EmitEvent(event)
   795  
   796  	if result.OOMKilled {
   797  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "oom_killed"}, 1, tr.baseLabels)
   798  	}
   799  }
   800  
   801  // handleUpdates runs update hooks when triggerUpdateCh is ticked and exits
   802  // when Run has returned. Should only be run in a goroutine from Run.
   803  func (tr *TaskRunner) handleUpdates() {
   804  	for {
   805  		select {
   806  		case <-tr.triggerUpdateCh:
   807  		case <-tr.waitCh:
   808  			return
   809  		}
   810  
   811  		// Non-terminal update; run hooks
   812  		tr.updateHooks()
   813  	}
   814  }
   815  
   816  // shouldRestart determines whether the task should be restarted and updates
   817  // the task state unless the task is killed or terminated.
   818  func (tr *TaskRunner) shouldRestart() (bool, time.Duration) {
   819  	// Determine if we should restart
   820  	state, when := tr.restartTracker.GetState()
   821  	reason := tr.restartTracker.GetReason()
   822  	switch state {
   823  	case structs.TaskKilled:
   824  		// Never restart an explicitly killed task. Kill method handles
   825  		// updating the server.
   826  		tr.EmitEvent(structs.NewTaskEvent(state))
   827  		return false, 0
   828  	case structs.TaskNotRestarting, structs.TaskTerminated:
   829  		tr.logger.Info("not restarting task", "reason", reason)
   830  		if state == structs.TaskNotRestarting {
   831  			tr.UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskNotRestarting).SetRestartReason(reason).SetFailsTask())
   832  		}
   833  		return false, 0
   834  	case structs.TaskRestarting:
   835  		tr.logger.Info("restarting task", "reason", reason, "delay", when)
   836  		tr.UpdateState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskRestarting).SetRestartDelay(when).SetRestartReason(reason))
   837  		return true, when
   838  	default:
   839  		tr.logger.Error("restart tracker returned unknown state", "state", state)
   840  		return true, when
   841  	}
   842  }
   843  
   844  // runDriver runs the driver and waits for it to exit
   845  // runDriver emits an appropriate task event on success/failure
   846  func (tr *TaskRunner) runDriver() error {
   847  
   848  	taskConfig := tr.buildTaskConfig()
   849  	if tr.cpusetCgroupPathGetter != nil {
   850  		tr.logger.Trace("waiting for cgroup to exist for", "allocID", tr.allocID, "task", tr.task)
   851  		cpusetCgroupPath, err := tr.cpusetCgroupPathGetter(tr.killCtx)
   852  		if err != nil {
   853  			return err
   854  		}
   855  		taskConfig.Resources.LinuxResources.CpusetCgroupPath = cpusetCgroupPath
   856  	}
   857  
   858  	// Build hcl context variables
   859  	vars, errs, err := tr.envBuilder.Build().AllValues()
   860  	if err != nil {
   861  		return fmt.Errorf("error building environment variables: %v", err)
   862  	}
   863  
   864  	// Handle per-key errors
   865  	if len(errs) > 0 {
   866  		keys := make([]string, 0, len(errs))
   867  		for k, err := range errs {
   868  			keys = append(keys, k)
   869  
   870  			if tr.logger.IsTrace() {
   871  				// Verbosely log every diagnostic for debugging
   872  				tr.logger.Trace("error building environment variables", "key", k, "error", err)
   873  			}
   874  		}
   875  
   876  		tr.logger.Warn("some environment variables not available for rendering", "keys", strings.Join(keys, ", "))
   877  	}
   878  
   879  	val, diag, diagErrs := hclutils.ParseHclInterface(tr.task.Config, tr.taskSchema, vars)
   880  	if diag.HasErrors() {
   881  		parseErr := multierror.Append(errors.New("failed to parse config: "), diagErrs...)
   882  		tr.EmitEvent(structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(parseErr))
   883  		return parseErr
   884  	}
   885  
   886  	if err := taskConfig.EncodeDriverConfig(val); err != nil {
   887  		encodeErr := fmt.Errorf("failed to encode driver config: %v", err)
   888  		tr.EmitEvent(structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(encodeErr))
   889  		return encodeErr
   890  	}
   891  
   892  	// If there's already a task handle (eg from a Restore) there's nothing
   893  	// to do except update state.
   894  	if tr.getDriverHandle() != nil {
   895  		// Ensure running state is persisted but do *not* append a new
   896  		// task event as restoring is a client event and not relevant
   897  		// to a task's lifecycle.
   898  		if err := tr.updateStateImpl(structs.TaskStateRunning); err != nil {
   899  			//TODO return error and destroy task to avoid an orphaned task?
   900  			tr.logger.Warn("error persisting task state", "error", err)
   901  		}
   902  		return nil
   903  	}
   904  
   905  	// Start the job if there's no existing handle (or if RecoverTask failed)
   906  	handle, net, err := tr.driver.StartTask(taskConfig)
   907  	if err != nil {
   908  		// The plugin has died, try relaunching it
   909  		if err == bstructs.ErrPluginShutdown {
   910  			tr.logger.Info("failed to start task because plugin shutdown unexpectedly; attempting to recover")
   911  			if err := tr.initDriver(); err != nil {
   912  				taskErr := fmt.Errorf("failed to initialize driver after it exited unexpectedly: %v", err)
   913  				tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(taskErr))
   914  				return taskErr
   915  			}
   916  
   917  			handle, net, err = tr.driver.StartTask(taskConfig)
   918  			if err != nil {
   919  				taskErr := fmt.Errorf("failed to start task after driver exited unexpectedly: %v", err)
   920  				tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(taskErr))
   921  				return taskErr
   922  			}
   923  		} else {
   924  			// Do *NOT* wrap the error here without maintaining whether or not is Recoverable.
   925  			// You must emit a task event failure to be considered Recoverable
   926  			tr.EmitEvent(structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(err))
   927  			return err
   928  		}
   929  	}
   930  
   931  	tr.stateLock.Lock()
   932  	tr.localState.TaskHandle = handle
   933  	tr.localState.DriverNetwork = net
   934  	if err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState); err != nil {
   935  		//TODO Nomad will be unable to restore this task; try to kill
   936  		//     it now and fail? In general we prefer to leave running
   937  		//     tasks running even if the agent encounters an error.
   938  		tr.logger.Warn("error persisting local task state; may be unable to restore after a Nomad restart",
   939  			"error", err, "task_id", handle.Config.ID)
   940  	}
   941  	tr.stateLock.Unlock()
   942  
   943  	tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net))
   944  
   945  	// Emit an event that we started
   946  	tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
   947  	return nil
   948  }
   949  
   950  // initDriver retrives the DriverPlugin from the plugin loader for this task
   951  func (tr *TaskRunner) initDriver() error {
   952  	driver, err := tr.driverManager.Dispense(tr.Task().Driver)
   953  	if err != nil {
   954  		return err
   955  	}
   956  	tr.driver = driver
   957  
   958  	schema, err := tr.driver.TaskConfigSchema()
   959  	if err != nil {
   960  		return err
   961  	}
   962  	spec, diag := hclspecutils.Convert(schema)
   963  	if diag.HasErrors() {
   964  		return multierror.Append(errors.New("failed to convert task schema"), diag.Errs()...)
   965  	}
   966  	tr.taskSchema = spec
   967  
   968  	caps, err := tr.driver.Capabilities()
   969  	if err != nil {
   970  		return err
   971  	}
   972  	tr.driverCapabilities = caps
   973  
   974  	return nil
   975  }
   976  
   977  // handleKill is used to handle the a request to kill a task. It will return
   978  // the handle exit result if one is available and store any error in the task
   979  // runner killErr value.
   980  func (tr *TaskRunner) handleKill(resultCh <-chan *drivers.ExitResult) *drivers.ExitResult {
   981  	// Run the pre killing hooks
   982  	tr.preKill()
   983  
   984  	// Wait for task ShutdownDelay after running prekill hooks
   985  	// This allows for things like service de-registration to run
   986  	// before waiting to kill task
   987  	if delay := tr.Task().ShutdownDelay; delay != 0 {
   988  		tr.logger.Debug("waiting before killing task", "shutdown_delay", delay)
   989  
   990  		ev := structs.NewTaskEvent(structs.TaskWaitingShuttingDownDelay).
   991  			SetDisplayMessage(fmt.Sprintf("Waiting for shutdown_delay of %s before killing the task.", delay))
   992  		tr.UpdateState(structs.TaskStatePending, ev)
   993  
   994  		select {
   995  		case result := <-resultCh:
   996  			return result
   997  		case <-tr.shutdownDelayCtx.Done():
   998  			break
   999  		case <-time.After(delay):
  1000  		}
  1001  	}
  1002  
  1003  	// Tell the restart tracker that the task has been killed so it doesn't
  1004  	// attempt to restart it.
  1005  	tr.restartTracker.SetKilled()
  1006  
  1007  	// Check it is running
  1008  	select {
  1009  	case result := <-resultCh:
  1010  		return result
  1011  	default:
  1012  	}
  1013  
  1014  	handle := tr.getDriverHandle()
  1015  	if handle == nil {
  1016  		return nil
  1017  	}
  1018  
  1019  	// Kill the task using an exponential backoff in-case of failures.
  1020  	result, killErr := tr.killTask(handle, resultCh)
  1021  	if killErr != nil {
  1022  		// We couldn't successfully destroy the resource created.
  1023  		tr.logger.Error("failed to kill task. Resources may have been leaked", "error", killErr)
  1024  		tr.setKillErr(killErr)
  1025  	}
  1026  
  1027  	if result != nil {
  1028  		return result
  1029  	}
  1030  
  1031  	// Block until task has exited.
  1032  	if resultCh == nil {
  1033  		var err error
  1034  		resultCh, err = handle.WaitCh(tr.shutdownCtx)
  1035  
  1036  		// The error should be nil or TaskNotFound, if it's something else then a
  1037  		// failure in the driver or transport layer occurred
  1038  		if err != nil {
  1039  			if err == drivers.ErrTaskNotFound {
  1040  				return nil
  1041  			}
  1042  			tr.logger.Error("failed to wait on task. Resources may have been leaked", "error", err)
  1043  			tr.setKillErr(killErr)
  1044  			return nil
  1045  		}
  1046  	}
  1047  
  1048  	select {
  1049  	case result := <-resultCh:
  1050  		return result
  1051  	case <-tr.shutdownCtx.Done():
  1052  		return nil
  1053  	}
  1054  }
  1055  
  1056  // killTask kills the task handle. In the case that killing fails,
  1057  // killTask will retry with an exponential backoff and will give up at a
  1058  // given limit. Returns an error if the task could not be killed.
  1059  func (tr *TaskRunner) killTask(handle *DriverHandle, resultCh <-chan *drivers.ExitResult) (*drivers.ExitResult, error) {
  1060  	// Cap the number of times we attempt to kill the task.
  1061  	var err error
  1062  	for i := 0; i < killFailureLimit; i++ {
  1063  		if err = handle.Kill(); err != nil {
  1064  			if err == drivers.ErrTaskNotFound {
  1065  				tr.logger.Warn("couldn't find task to kill", "task_id", handle.ID())
  1066  				return nil, nil
  1067  			}
  1068  			// Calculate the new backoff
  1069  			backoff := (1 << (2 * uint64(i))) * killBackoffBaseline
  1070  			if backoff > killBackoffLimit {
  1071  				backoff = killBackoffLimit
  1072  			}
  1073  
  1074  			tr.logger.Error("failed to kill task", "backoff", backoff, "error", err)
  1075  			select {
  1076  			case result := <-resultCh:
  1077  				return result, nil
  1078  			case <-time.After(backoff):
  1079  			}
  1080  		} else {
  1081  			// Kill was successful
  1082  			return nil, nil
  1083  		}
  1084  	}
  1085  	return nil, err
  1086  }
  1087  
  1088  // persistLocalState persists local state to disk synchronously.
  1089  func (tr *TaskRunner) persistLocalState() error {
  1090  	tr.stateLock.RLock()
  1091  	defer tr.stateLock.RUnlock()
  1092  
  1093  	return tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState)
  1094  }
  1095  
  1096  // buildTaskConfig builds a drivers.TaskConfig with an unique ID for the task.
  1097  // The ID is unique for every invocation, it is built from the alloc ID, task
  1098  // name and 8 random characters.
  1099  func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig {
  1100  	task := tr.Task()
  1101  	alloc := tr.Alloc()
  1102  	invocationid := uuid.Generate()[:8]
  1103  	taskResources := tr.taskResources
  1104  	ports := tr.Alloc().AllocatedResources.Shared.Ports
  1105  	env := tr.envBuilder.Build()
  1106  	tr.networkIsolationLock.Lock()
  1107  	defer tr.networkIsolationLock.Unlock()
  1108  
  1109  	var dns *drivers.DNSConfig
  1110  	if alloc.AllocatedResources != nil && len(alloc.AllocatedResources.Shared.Networks) > 0 {
  1111  		allocDNS := alloc.AllocatedResources.Shared.Networks[0].DNS
  1112  		if allocDNS != nil {
  1113  			interpolatedNetworks := taskenv.InterpolateNetworks(env, alloc.AllocatedResources.Shared.Networks)
  1114  			dns = &drivers.DNSConfig{
  1115  				Servers:  interpolatedNetworks[0].DNS.Servers,
  1116  				Searches: interpolatedNetworks[0].DNS.Searches,
  1117  				Options:  interpolatedNetworks[0].DNS.Options,
  1118  			}
  1119  		}
  1120  	}
  1121  
  1122  	memoryLimit := taskResources.Memory.MemoryMB
  1123  	if max := taskResources.Memory.MemoryMaxMB; max > memoryLimit {
  1124  		memoryLimit = max
  1125  	}
  1126  
  1127  	cpusetCpus := make([]string, len(taskResources.Cpu.ReservedCores))
  1128  	for i, v := range taskResources.Cpu.ReservedCores {
  1129  		cpusetCpus[i] = fmt.Sprintf("%d", v)
  1130  	}
  1131  
  1132  	return &drivers.TaskConfig{
  1133  		ID:            fmt.Sprintf("%s/%s/%s", alloc.ID, task.Name, invocationid),
  1134  		Name:          task.Name,
  1135  		JobName:       alloc.Job.Name,
  1136  		JobID:         alloc.Job.ID,
  1137  		TaskGroupName: alloc.TaskGroup,
  1138  		Namespace:     alloc.Namespace,
  1139  		NodeName:      alloc.NodeName,
  1140  		NodeID:        alloc.NodeID,
  1141  		Resources: &drivers.Resources{
  1142  			NomadResources: taskResources,
  1143  			LinuxResources: &drivers.LinuxResources{
  1144  				MemoryLimitBytes: memoryLimit * 1024 * 1024,
  1145  				CPUShares:        taskResources.Cpu.CpuShares,
  1146  				CpusetCpus:       strings.Join(cpusetCpus, ","),
  1147  				PercentTicks:     float64(taskResources.Cpu.CpuShares) / float64(tr.clientConfig.Node.NodeResources.Cpu.CpuShares),
  1148  			},
  1149  			Ports: &ports,
  1150  		},
  1151  		Devices:          tr.hookResources.getDevices(),
  1152  		Mounts:           tr.hookResources.getMounts(),
  1153  		Env:              env.Map(),
  1154  		DeviceEnv:        env.DeviceEnv(),
  1155  		User:             task.User,
  1156  		AllocDir:         tr.taskDir.AllocDir,
  1157  		StdoutPath:       tr.logmonHookConfig.stdoutFifo,
  1158  		StderrPath:       tr.logmonHookConfig.stderrFifo,
  1159  		AllocID:          tr.allocID,
  1160  		NetworkIsolation: tr.networkIsolationSpec,
  1161  		DNS:              dns,
  1162  	}
  1163  }
  1164  
  1165  // Restore task runner state. Called by AllocRunner.Restore after NewTaskRunner
  1166  // but before Run so no locks need to be acquired.
  1167  func (tr *TaskRunner) Restore() error {
  1168  	ls, ts, err := tr.stateDB.GetTaskRunnerState(tr.allocID, tr.taskName)
  1169  	if err != nil {
  1170  		return err
  1171  	}
  1172  
  1173  	if ls != nil {
  1174  		ls.Canonicalize()
  1175  		tr.localState = ls
  1176  	}
  1177  
  1178  	if ts != nil {
  1179  		ts.Canonicalize()
  1180  		tr.state = ts
  1181  	}
  1182  
  1183  	// If a TaskHandle was persisted, ensure it is valid or destroy it.
  1184  	if taskHandle := tr.localState.TaskHandle; taskHandle != nil {
  1185  		//TODO if RecoverTask returned the DriverNetwork we wouldn't
  1186  		//     have to persist it at all!
  1187  		restored := tr.restoreHandle(taskHandle, tr.localState.DriverNetwork)
  1188  
  1189  		// If the handle could not be restored, the alloc is
  1190  		// non-terminal, and the task isn't a system job: wait until
  1191  		// servers have been contacted before running. #1795
  1192  		if restored {
  1193  			return nil
  1194  		}
  1195  
  1196  		alloc := tr.Alloc()
  1197  		if tr.state.State == structs.TaskStateDead || alloc.TerminalStatus() || alloc.Job.Type == structs.JobTypeSystem {
  1198  			return nil
  1199  		}
  1200  
  1201  		tr.logger.Trace("failed to reattach to task; will not run until server is contacted")
  1202  		tr.waitOnServers = true
  1203  
  1204  		ev := structs.NewTaskEvent(structs.TaskRestoreFailed).
  1205  			SetDisplayMessage("failed to restore task; will not run until server is contacted")
  1206  		tr.UpdateState(structs.TaskStatePending, ev)
  1207  	}
  1208  
  1209  	return nil
  1210  }
  1211  
  1212  // restoreHandle ensures a TaskHandle is valid by calling Driver.RecoverTask
  1213  // and sets the driver handle. If the TaskHandle is not valid, DestroyTask is
  1214  // called.
  1215  func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *drivers.DriverNetwork) (success bool) {
  1216  	// Ensure handle is well-formed
  1217  	if taskHandle.Config == nil {
  1218  		return true
  1219  	}
  1220  
  1221  	if err := tr.driver.RecoverTask(taskHandle); err != nil {
  1222  		if tr.TaskState().State != structs.TaskStateRunning {
  1223  			// RecoverTask should fail if the Task wasn't running
  1224  			return true
  1225  		}
  1226  
  1227  		tr.logger.Error("error recovering task; cleaning up",
  1228  			"error", err, "task_id", taskHandle.Config.ID)
  1229  
  1230  		// Try to cleanup any existing task state in the plugin before restarting
  1231  		if err := tr.driver.DestroyTask(taskHandle.Config.ID, true); err != nil {
  1232  			// Ignore ErrTaskNotFound errors as ideally
  1233  			// this task has already been stopped and
  1234  			// therefore doesn't exist.
  1235  			if err != drivers.ErrTaskNotFound {
  1236  				tr.logger.Warn("error destroying unrecoverable task",
  1237  					"error", err, "task_id", taskHandle.Config.ID)
  1238  			}
  1239  
  1240  		}
  1241  
  1242  		return false
  1243  	}
  1244  
  1245  	// Update driver handle on task runner
  1246  	tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net))
  1247  	return true
  1248  }
  1249  
  1250  // UpdateState sets the task runners allocation state and triggers a server
  1251  // update.
  1252  func (tr *TaskRunner) UpdateState(state string, event *structs.TaskEvent) {
  1253  	tr.stateLock.Lock()
  1254  	defer tr.stateLock.Unlock()
  1255  
  1256  	tr.logger.Trace("setting task state", "state", state)
  1257  
  1258  	if event != nil {
  1259  		tr.logger.Trace("appending task event", "state", state, "event", event.Type)
  1260  
  1261  		// Append the event
  1262  		tr.appendEvent(event)
  1263  	}
  1264  
  1265  	// Update the state
  1266  	if err := tr.updateStateImpl(state); err != nil {
  1267  		// Only log the error as we persistence errors should not
  1268  		// affect task state.
  1269  		tr.logger.Error("error persisting task state", "error", err, "event", event, "state", state)
  1270  	}
  1271  
  1272  	// Store task handle for remote tasks
  1273  	if tr.driverCapabilities != nil && tr.driverCapabilities.RemoteTasks {
  1274  		tr.logger.Trace("storing remote task handle state")
  1275  		tr.localState.TaskHandle.Store(tr.state)
  1276  	}
  1277  
  1278  	// Notify the alloc runner of the transition
  1279  	tr.stateUpdater.TaskStateUpdated()
  1280  }
  1281  
  1282  // updateStateImpl updates the in-memory task state and persists to disk.
  1283  func (tr *TaskRunner) updateStateImpl(state string) error {
  1284  
  1285  	// Update the task state
  1286  	oldState := tr.state.State
  1287  	taskState := tr.state
  1288  	taskState.State = state
  1289  
  1290  	// Handle the state transition.
  1291  	switch state {
  1292  	case structs.TaskStateRunning:
  1293  		// Capture the start time if it is just starting
  1294  		if oldState != structs.TaskStateRunning {
  1295  			taskState.StartedAt = time.Now().UTC()
  1296  			metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 1, tr.baseLabels)
  1297  		}
  1298  	case structs.TaskStateDead:
  1299  		// Capture the finished time if not already set
  1300  		if taskState.FinishedAt.IsZero() {
  1301  			taskState.FinishedAt = time.Now().UTC()
  1302  		}
  1303  
  1304  		// Emitting metrics to indicate task complete and failures
  1305  		if taskState.Failed {
  1306  			metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 1, tr.baseLabels)
  1307  		} else {
  1308  			metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 1, tr.baseLabels)
  1309  		}
  1310  	}
  1311  
  1312  	// Persist the state and event
  1313  	return tr.stateDB.PutTaskState(tr.allocID, tr.taskName, taskState)
  1314  }
  1315  
  1316  // EmitEvent appends a new TaskEvent to this task's TaskState. The actual
  1317  // TaskState.State (pending, running, dead) is not changed. Use UpdateState to
  1318  // transition states.
  1319  // Events are persisted locally and sent to the server, but errors are simply
  1320  // logged. Use AppendEvent to simply add a new event.
  1321  func (tr *TaskRunner) EmitEvent(event *structs.TaskEvent) {
  1322  	tr.stateLock.Lock()
  1323  	defer tr.stateLock.Unlock()
  1324  
  1325  	tr.appendEvent(event)
  1326  
  1327  	if err := tr.stateDB.PutTaskState(tr.allocID, tr.taskName, tr.state); err != nil {
  1328  		// Only a warning because the next event/state-transition will
  1329  		// try to persist it again.
  1330  		tr.logger.Warn("error persisting event", "error", err, "event", event)
  1331  	}
  1332  
  1333  	// Notify the alloc runner of the event
  1334  	tr.stateUpdater.TaskStateUpdated()
  1335  }
  1336  
  1337  // AppendEvent appends a new TaskEvent to this task's TaskState. The actual
  1338  // TaskState.State (pending, running, dead) is not changed. Use UpdateState to
  1339  // transition states.
  1340  // Events are persisted locally and errors are simply logged. Use EmitEvent
  1341  // also update AllocRunner.
  1342  func (tr *TaskRunner) AppendEvent(event *structs.TaskEvent) {
  1343  	tr.stateLock.Lock()
  1344  	defer tr.stateLock.Unlock()
  1345  
  1346  	tr.appendEvent(event)
  1347  
  1348  	if err := tr.stateDB.PutTaskState(tr.allocID, tr.taskName, tr.state); err != nil {
  1349  		// Only a warning because the next event/state-transition will
  1350  		// try to persist it again.
  1351  		tr.logger.Warn("error persisting event", "error", err, "event", event)
  1352  	}
  1353  }
  1354  
  1355  // appendEvent to task's event slice. Caller must acquire stateLock.
  1356  func (tr *TaskRunner) appendEvent(event *structs.TaskEvent) error {
  1357  	// Ensure the event is populated with human readable strings
  1358  	event.PopulateEventDisplayMessage()
  1359  
  1360  	// Propagate failure from event to task state
  1361  	if event.FailsTask {
  1362  		tr.state.Failed = true
  1363  	}
  1364  
  1365  	// XXX This seems like a super awkward spot for this? Why not shouldRestart?
  1366  	// Update restart metrics
  1367  	if event.Type == structs.TaskRestarting {
  1368  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 1, tr.baseLabels)
  1369  		tr.state.Restarts++
  1370  		tr.state.LastRestart = time.Unix(0, event.Time)
  1371  	}
  1372  
  1373  	// Append event to slice
  1374  	appendTaskEvent(tr.state, event, tr.maxEvents)
  1375  
  1376  	return nil
  1377  }
  1378  
  1379  // WaitCh is closed when TaskRunner.Run exits.
  1380  func (tr *TaskRunner) WaitCh() <-chan struct{} {
  1381  	return tr.waitCh
  1382  }
  1383  
  1384  // Update the running allocation with a new version received from the server.
  1385  // Calls Update hooks asynchronously with Run.
  1386  //
  1387  // This method is safe for calling concurrently with Run and does not modify
  1388  // the passed in allocation.
  1389  func (tr *TaskRunner) Update(update *structs.Allocation) {
  1390  	task := update.LookupTask(tr.taskName)
  1391  	if task == nil {
  1392  		// This should not happen and likely indicates a bug in the
  1393  		// server or client.
  1394  		tr.logger.Error("allocation update is missing task; killing",
  1395  			"group", update.TaskGroup)
  1396  		te := structs.NewTaskEvent(structs.TaskKilled).
  1397  			SetKillReason("update missing task").
  1398  			SetFailsTask()
  1399  		tr.Kill(context.Background(), te)
  1400  		return
  1401  	}
  1402  
  1403  	// Update tr.alloc
  1404  	tr.setAlloc(update, task)
  1405  
  1406  	// Trigger update hooks if not terminal
  1407  	if !update.TerminalStatus() {
  1408  		tr.triggerUpdateHooks()
  1409  	}
  1410  }
  1411  
  1412  // SetNetworkIsolation is called by the PreRun allocation hook after configuring
  1413  // the network isolation for the allocation
  1414  func (tr *TaskRunner) SetNetworkIsolation(n *drivers.NetworkIsolationSpec) {
  1415  	tr.networkIsolationLock.Lock()
  1416  	tr.networkIsolationSpec = n
  1417  	tr.networkIsolationLock.Unlock()
  1418  }
  1419  
  1420  // triggerUpdate if there isn't already an update pending. Should be called
  1421  // instead of calling updateHooks directly to serialize runs of update hooks.
  1422  // TaskRunner state should be updated prior to triggering update hooks.
  1423  //
  1424  // Does not block.
  1425  func (tr *TaskRunner) triggerUpdateHooks() {
  1426  	select {
  1427  	case tr.triggerUpdateCh <- struct{}{}:
  1428  	default:
  1429  		// already an update hook pending
  1430  	}
  1431  }
  1432  
  1433  // Shutdown TaskRunner gracefully without affecting the state of the task.
  1434  // Shutdown blocks until the main Run loop exits.
  1435  func (tr *TaskRunner) Shutdown() {
  1436  	tr.logger.Trace("shutting down")
  1437  	tr.shutdownCtxCancel()
  1438  
  1439  	<-tr.WaitCh()
  1440  
  1441  	// Run shutdown hooks to cleanup
  1442  	tr.shutdownHooks()
  1443  
  1444  	// Persist once more
  1445  	tr.persistLocalState()
  1446  }
  1447  
  1448  // LatestResourceUsage returns the last resource utilization datapoint
  1449  // collected. May return nil if the task is not running or no resource
  1450  // utilization has been collected yet.
  1451  func (tr *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage {
  1452  	tr.resourceUsageLock.Lock()
  1453  	ru := tr.resourceUsage
  1454  	tr.resourceUsageLock.Unlock()
  1455  
  1456  	// Look up device statistics lazily when fetched, as currently we do not emit any stats for them yet
  1457  	if ru != nil && tr.deviceStatsReporter != nil {
  1458  		deviceResources := tr.taskResources.Devices
  1459  		ru.ResourceUsage.DeviceStats = tr.deviceStatsReporter.LatestDeviceResourceStats(deviceResources)
  1460  	}
  1461  	return ru
  1462  }
  1463  
  1464  // UpdateStats updates and emits the latest stats from the driver.
  1465  func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) {
  1466  	tr.resourceUsageLock.Lock()
  1467  	tr.resourceUsage = ru
  1468  	tr.resourceUsageLock.Unlock()
  1469  	if ru != nil {
  1470  		tr.emitStats(ru)
  1471  	}
  1472  }
  1473  
  1474  // TODO Remove Backwardscompat or use tr.Alloc()?
  1475  func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) {
  1476  	alloc := tr.Alloc()
  1477  	var allocatedMem float32
  1478  	if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil {
  1479  		// Convert to bytes to match other memory metrics
  1480  		allocatedMem = float32(taskRes.Memory.MemoryMB) * 1024 * 1024
  1481  	}
  1482  
  1483  	ms := ru.ResourceUsage.MemoryStats
  1484  
  1485  	publishMetric := func(v uint64, reported, measured string) {
  1486  		if v != 0 || slices.Contains(ms.Measured, measured) {
  1487  			metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", reported},
  1488  				float32(v), tr.baseLabels)
  1489  		}
  1490  	}
  1491  
  1492  	publishMetric(ms.RSS, "rss", "RSS")
  1493  	publishMetric(ms.Cache, "cache", "Cache")
  1494  	publishMetric(ms.Swap, "swap", "Swap")
  1495  	publishMetric(ms.MappedFile, "mapped_file", "Mapped File")
  1496  	publishMetric(ms.Usage, "usage", "Usage")
  1497  	publishMetric(ms.MaxUsage, "max_usage", "Max Usage")
  1498  	publishMetric(ms.KernelUsage, "kernel_usage", "Kernel Usage")
  1499  	publishMetric(ms.KernelMaxUsage, "kernel_max_usage", "Kernel Max Usage")
  1500  	if allocatedMem > 0 {
  1501  		metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", "allocated"},
  1502  			allocatedMem, tr.baseLabels)
  1503  	}
  1504  }
  1505  
  1506  // TODO Remove Backwardscompat or use tr.Alloc()?
  1507  func (tr *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) {
  1508  	alloc := tr.Alloc()
  1509  	var allocatedCPU float32
  1510  	if taskRes := alloc.AllocatedResources.Tasks[tr.taskName]; taskRes != nil {
  1511  		allocatedCPU = float32(taskRes.Cpu.CpuShares)
  1512  	}
  1513  
  1514  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_percent"},
  1515  		float32(ru.ResourceUsage.CpuStats.Percent), tr.baseLabels)
  1516  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "system"},
  1517  		float32(ru.ResourceUsage.CpuStats.SystemMode), tr.baseLabels)
  1518  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "user"},
  1519  		float32(ru.ResourceUsage.CpuStats.UserMode), tr.baseLabels)
  1520  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_time"},
  1521  		float32(ru.ResourceUsage.CpuStats.ThrottledTime), tr.baseLabels)
  1522  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "throttled_periods"},
  1523  		float32(ru.ResourceUsage.CpuStats.ThrottledPeriods), tr.baseLabels)
  1524  	metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "total_ticks"},
  1525  		float32(ru.ResourceUsage.CpuStats.TotalTicks), tr.baseLabels)
  1526  	if allocatedCPU > 0 {
  1527  		metrics.SetGaugeWithLabels([]string{"client", "allocs", "cpu", "allocated"},
  1528  			allocatedCPU, tr.baseLabels)
  1529  	}
  1530  }
  1531  
  1532  // emitStats emits resource usage stats of tasks to remote metrics collector
  1533  // sinks
  1534  func (tr *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
  1535  	if !tr.clientConfig.PublishAllocationMetrics {
  1536  		return
  1537  	}
  1538  
  1539  	if ru.ResourceUsage.MemoryStats != nil {
  1540  		tr.setGaugeForMemory(ru)
  1541  	} else {
  1542  		tr.logger.Debug("Skipping memory stats for allocation", "reason", "MemoryStats is nil")
  1543  	}
  1544  
  1545  	if ru.ResourceUsage.CpuStats != nil {
  1546  		tr.setGaugeForCPU(ru)
  1547  	} else {
  1548  		tr.logger.Debug("Skipping cpu stats for allocation", "reason", "CpuStats is nil")
  1549  	}
  1550  }
  1551  
  1552  // appendTaskEvent updates the task status by appending the new event.
  1553  func appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent, capacity int) {
  1554  	if state.Events == nil {
  1555  		state.Events = make([]*structs.TaskEvent, 1, capacity)
  1556  		state.Events[0] = event
  1557  		return
  1558  	}
  1559  
  1560  	// If we hit capacity, then shift it.
  1561  	if len(state.Events) == capacity {
  1562  		old := state.Events
  1563  		state.Events = make([]*structs.TaskEvent, 0, capacity)
  1564  		state.Events = append(state.Events, old[1:]...)
  1565  	}
  1566  
  1567  	state.Events = append(state.Events, event)
  1568  }
  1569  
  1570  func (tr *TaskRunner) TaskExecHandler() drivermanager.TaskExecHandler {
  1571  	// Check it is running
  1572  	handle := tr.getDriverHandle()
  1573  	if handle == nil {
  1574  		return nil
  1575  	}
  1576  	return handle.ExecStreaming
  1577  }
  1578  
  1579  func (tr *TaskRunner) DriverCapabilities() (*drivers.Capabilities, error) {
  1580  	return tr.driver.Capabilities()
  1581  }
  1582  
  1583  func (tr *TaskRunner) SetAllocHookResources(res *cstructs.AllocHookResources) {
  1584  	tr.allocHookResources = res
  1585  }
  1586  
  1587  // shutdownDelayCancel is used for testing only and cancels the
  1588  // shutdownDelayCtx
  1589  func (tr *TaskRunner) shutdownDelayCancel() {
  1590  	tr.shutdownDelayCancelFn()
  1591  }