github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/task_runner.go (about)

     1  package client
     2  
     3  import (
     4  	"crypto/md5"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/armon/go-metrics"
    16  	"github.com/hashicorp/consul-template/signals"
    17  	"github.com/hashicorp/go-multierror"
    18  	"github.com/hashicorp/nomad/client/config"
    19  	"github.com/hashicorp/nomad/client/driver"
    20  	"github.com/hashicorp/nomad/client/getter"
    21  	"github.com/hashicorp/nomad/client/vaultclient"
    22  	"github.com/hashicorp/nomad/nomad/structs"
    23  
    24  	"github.com/hashicorp/nomad/client/driver/env"
    25  	dstructs "github.com/hashicorp/nomad/client/driver/structs"
    26  	cstructs "github.com/hashicorp/nomad/client/structs"
    27  )
    28  
    29  const (
    30  	// killBackoffBaseline is the baseline time for exponential backoff while
    31  	// killing a task.
    32  	killBackoffBaseline = 5 * time.Second
    33  
    34  	// killBackoffLimit is the limit of the exponential backoff for killing
    35  	// the task.
    36  	killBackoffLimit = 2 * time.Minute
    37  
    38  	// killFailureLimit is how many times we will attempt to kill a task before
    39  	// giving up and potentially leaking resources.
    40  	killFailureLimit = 5
    41  
    42  	// vaultBackoffBaseline is the baseline time for exponential backoff when
    43  	// attempting to retrieve a Vault token
    44  	vaultBackoffBaseline = 5 * time.Second
    45  
    46  	// vaultBackoffLimit is the limit of the exponential backoff when attempting
    47  	// to retrieve a Vault token
    48  	vaultBackoffLimit = 3 * time.Minute
    49  
    50  	// vaultTokenFile is the name of the file holding the Vault token inside the
    51  	// task's secret directory
    52  	vaultTokenFile = "vault_token"
    53  )
    54  
    55  // TaskRunner is used to wrap a task within an allocation and provide the execution context.
    56  type TaskRunner struct {
    57  	config         *config.Config
    58  	updater        TaskStateUpdater
    59  	logger         *log.Logger
    60  	ctx            *driver.ExecContext
    61  	alloc          *structs.Allocation
    62  	restartTracker *RestartTracker
    63  
    64  	// running marks whether the task is running
    65  	running     bool
    66  	runningLock sync.Mutex
    67  
    68  	resourceUsage     *cstructs.TaskResourceUsage
    69  	resourceUsageLock sync.RWMutex
    70  
    71  	task    *structs.Task
    72  	taskDir string
    73  
    74  	// taskEnv is the environment variables of the task
    75  	taskEnv     *env.TaskEnvironment
    76  	taskEnvLock sync.Mutex
    77  
    78  	// updateCh is used to receive updated versions of the allocation
    79  	updateCh chan *structs.Allocation
    80  
    81  	handle     driver.DriverHandle
    82  	handleLock sync.Mutex
    83  
    84  	// artifactsDownloaded tracks whether the tasks artifacts have been
    85  	// downloaded
    86  	artifactsDownloaded bool
    87  
    88  	// vaultFuture is the means to wait for and get a Vault token
    89  	vaultFuture *tokenFuture
    90  
    91  	// recoveredVaultToken is the token that was recovered through a restore
    92  	recoveredVaultToken string
    93  
    94  	// vaultClient is used to retrieve and renew any needed Vault token
    95  	vaultClient vaultclient.VaultClient
    96  
    97  	// templateManager is used to manage any consul-templates this task may have
    98  	templateManager *TaskTemplateManager
    99  
   100  	// startCh is used to trigger the start of the task
   101  	startCh chan struct{}
   102  
   103  	// unblockCh is used to unblock the starting of the task
   104  	unblockCh   chan struct{}
   105  	unblocked   bool
   106  	unblockLock sync.Mutex
   107  
   108  	// restartCh is used to restart a task
   109  	restartCh chan *structs.TaskEvent
   110  
   111  	// signalCh is used to send a signal to a task
   112  	signalCh chan SignalEvent
   113  
   114  	destroy      bool
   115  	destroyCh    chan struct{}
   116  	destroyLock  sync.Mutex
   117  	destroyEvent *structs.TaskEvent
   118  
   119  	// waitCh closing marks the run loop as having exited
   120  	waitCh chan struct{}
   121  
   122  	// serialize SaveState calls
   123  	persistLock sync.Mutex
   124  }
   125  
   126  // taskRunnerState is used to snapshot the state of the task runner
   127  type taskRunnerState struct {
   128  	Version            string
   129  	Task               *structs.Task
   130  	HandleID           string
   131  	ArtifactDownloaded bool
   132  }
   133  
   134  // TaskStateUpdater is used to signal that tasks state has changed.
   135  type TaskStateUpdater func(taskName, state string, event *structs.TaskEvent)
   136  
   137  // SignalEvent is a tuple of the signal and the event generating it
   138  type SignalEvent struct {
   139  	// s is the signal to be sent
   140  	s os.Signal
   141  
   142  	// e is the task event generating the signal
   143  	e *structs.TaskEvent
   144  
   145  	// result should be used to send back the result of the signal
   146  	result chan<- error
   147  }
   148  
   149  // NewTaskRunner is used to create a new task context
   150  func NewTaskRunner(logger *log.Logger, config *config.Config,
   151  	updater TaskStateUpdater, ctx *driver.ExecContext,
   152  	alloc *structs.Allocation, task *structs.Task,
   153  	vaultClient vaultclient.VaultClient) *TaskRunner {
   154  
   155  	// Merge in the task resources
   156  	task.Resources = alloc.TaskResources[task.Name]
   157  
   158  	// Build the restart tracker.
   159  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   160  	if tg == nil {
   161  		logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
   162  		return nil
   163  	}
   164  	restartTracker := newRestartTracker(tg.RestartPolicy, alloc.Job.Type)
   165  
   166  	// Get the task directory
   167  	taskDir, ok := ctx.AllocDir.TaskDirs[task.Name]
   168  	if !ok {
   169  		logger.Printf("[ERR] client: task directory for alloc %q task %q couldn't be found", alloc.ID, task.Name)
   170  		return nil
   171  	}
   172  
   173  	tc := &TaskRunner{
   174  		config:         config,
   175  		updater:        updater,
   176  		logger:         logger,
   177  		restartTracker: restartTracker,
   178  		ctx:            ctx,
   179  		alloc:          alloc,
   180  		task:           task,
   181  		taskDir:        taskDir,
   182  		vaultClient:    vaultClient,
   183  		vaultFuture:    NewTokenFuture().Set(""),
   184  		updateCh:       make(chan *structs.Allocation, 64),
   185  		destroyCh:      make(chan struct{}),
   186  		waitCh:         make(chan struct{}),
   187  		startCh:        make(chan struct{}, 1),
   188  		unblockCh:      make(chan struct{}),
   189  		restartCh:      make(chan *structs.TaskEvent),
   190  		signalCh:       make(chan SignalEvent),
   191  	}
   192  
   193  	return tc
   194  }
   195  
   196  // MarkReceived marks the task as received.
   197  func (r *TaskRunner) MarkReceived() {
   198  	r.updater(r.task.Name, structs.TaskStatePending, structs.NewTaskEvent(structs.TaskReceived))
   199  }
   200  
   201  // WaitCh returns a channel to wait for termination
   202  func (r *TaskRunner) WaitCh() <-chan struct{} {
   203  	return r.waitCh
   204  }
   205  
   206  // stateFilePath returns the path to our state file
   207  func (r *TaskRunner) stateFilePath() string {
   208  	// Get the MD5 of the task name
   209  	hashVal := md5.Sum([]byte(r.task.Name))
   210  	hashHex := hex.EncodeToString(hashVal[:])
   211  	dirName := fmt.Sprintf("task-%s", hashHex)
   212  
   213  	// Generate the path
   214  	path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID,
   215  		dirName, "state.json")
   216  	return path
   217  }
   218  
   219  // RestoreState is used to restore our state
   220  func (r *TaskRunner) RestoreState() error {
   221  	// Load the snapshot
   222  	var snap taskRunnerState
   223  	if err := restoreState(r.stateFilePath(), &snap); err != nil {
   224  		return err
   225  	}
   226  
   227  	// Restore fields
   228  	if snap.Task == nil {
   229  		return fmt.Errorf("task runner snapshot include nil Task")
   230  	} else {
   231  		r.task = snap.Task
   232  	}
   233  	r.artifactsDownloaded = snap.ArtifactDownloaded
   234  
   235  	if err := r.setTaskEnv(); err != nil {
   236  		return fmt.Errorf("client: failed to create task environment for task %q in allocation %q: %v",
   237  			r.task.Name, r.alloc.ID, err)
   238  	}
   239  
   240  	if r.task.Vault != nil {
   241  		secretDir, err := r.ctx.AllocDir.GetSecretDir(r.task.Name)
   242  		if err != nil {
   243  			return fmt.Errorf("failed to determine task %s secret dir in alloc %q: %v", r.task.Name, r.alloc.ID, err)
   244  		}
   245  
   246  		// Read the token from the secret directory
   247  		tokenPath := filepath.Join(secretDir, vaultTokenFile)
   248  		data, err := ioutil.ReadFile(tokenPath)
   249  		if err != nil {
   250  			if !os.IsNotExist(err) {
   251  				return fmt.Errorf("failed to read token for task %q in alloc %q: %v", r.task.Name, r.alloc.ID, err)
   252  			}
   253  
   254  			// Token file doesn't exist
   255  		} else {
   256  			// Store the recovered token
   257  			r.recoveredVaultToken = string(data)
   258  		}
   259  	}
   260  
   261  	// Restore the driver
   262  	if snap.HandleID != "" {
   263  		driver, err := r.createDriver()
   264  		if err != nil {
   265  			return err
   266  		}
   267  
   268  		handle, err := driver.Open(r.ctx, snap.HandleID)
   269  
   270  		// In the case it fails, we relaunch the task in the Run() method.
   271  		if err != nil {
   272  			r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v",
   273  				r.task.Name, r.alloc.ID, err)
   274  			return nil
   275  		}
   276  		r.handleLock.Lock()
   277  		r.handle = handle
   278  		r.handleLock.Unlock()
   279  
   280  		r.runningLock.Lock()
   281  		r.running = true
   282  		r.runningLock.Unlock()
   283  	}
   284  	return nil
   285  }
   286  
   287  // SaveState is used to snapshot our state
   288  func (r *TaskRunner) SaveState() error {
   289  	r.persistLock.Lock()
   290  	defer r.persistLock.Unlock()
   291  
   292  	snap := taskRunnerState{
   293  		Task:               r.task,
   294  		Version:            r.config.Version,
   295  		ArtifactDownloaded: r.artifactsDownloaded,
   296  	}
   297  	r.handleLock.Lock()
   298  	if r.handle != nil {
   299  		snap.HandleID = r.handle.ID()
   300  	}
   301  	r.handleLock.Unlock()
   302  	return persistState(r.stateFilePath(), &snap)
   303  }
   304  
   305  // DestroyState is used to cleanup after ourselves
   306  func (r *TaskRunner) DestroyState() error {
   307  	return os.RemoveAll(r.stateFilePath())
   308  }
   309  
   310  // setState is used to update the state of the task runner
   311  func (r *TaskRunner) setState(state string, event *structs.TaskEvent) {
   312  	// Persist our state to disk.
   313  	if err := r.SaveState(); err != nil {
   314  		r.logger.Printf("[ERR] client: failed to save state of Task Runner for task %q: %v", r.task.Name, err)
   315  	}
   316  
   317  	// Indicate the task has been updated.
   318  	r.updater(r.task.Name, state, event)
   319  }
   320  
   321  // setTaskEnv sets the task environment. It returns an error if it could not be
   322  // created.
   323  func (r *TaskRunner) setTaskEnv() error {
   324  	r.taskEnvLock.Lock()
   325  	defer r.taskEnvLock.Unlock()
   326  
   327  	taskEnv, err := driver.GetTaskEnv(r.ctx.AllocDir, r.config.Node, r.task.Copy(), r.alloc, r.vaultFuture.Get())
   328  	if err != nil {
   329  		return err
   330  	}
   331  	r.taskEnv = taskEnv
   332  	return nil
   333  }
   334  
   335  // getTaskEnv returns the task environment
   336  func (r *TaskRunner) getTaskEnv() *env.TaskEnvironment {
   337  	r.taskEnvLock.Lock()
   338  	defer r.taskEnvLock.Unlock()
   339  	return r.taskEnv
   340  }
   341  
   342  // createDriver makes a driver for the task
   343  func (r *TaskRunner) createDriver() (driver.Driver, error) {
   344  	env := r.getTaskEnv()
   345  	if env == nil {
   346  		return nil, fmt.Errorf("task environment not made for task %q in allocation %q", r.task.Name, r.alloc.ID)
   347  	}
   348  
   349  	driverCtx := driver.NewDriverContext(r.task.Name, r.config, r.config.Node, r.logger, env)
   350  	driver, err := driver.NewDriver(r.task.Driver, driverCtx)
   351  	if err != nil {
   352  		return nil, fmt.Errorf("failed to create driver '%s' for alloc %s: %v",
   353  			r.task.Driver, r.alloc.ID, err)
   354  	}
   355  	return driver, err
   356  }
   357  
   358  // Run is a long running routine used to manage the task
   359  func (r *TaskRunner) Run() {
   360  	defer close(r.waitCh)
   361  	r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')",
   362  		r.task.Name, r.alloc.ID)
   363  
   364  	// Create the initial environment, this will be recreated if a Vault token
   365  	// is needed
   366  	if err := r.setTaskEnv(); err != nil {
   367  		r.setState(
   368  			structs.TaskStateDead,
   369  			structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err))
   370  		return
   371  	}
   372  
   373  	if err := r.validateTask(); err != nil {
   374  		r.setState(
   375  			structs.TaskStateDead,
   376  			structs.NewTaskEvent(structs.TaskFailedValidation).SetValidationError(err).SetFailsTask())
   377  		return
   378  	}
   379  
   380  	// If there is no Vault policy leave the static future created in
   381  	// NewTaskRunner
   382  	if r.task.Vault != nil {
   383  		// Start the go-routine to get a Vault token
   384  		r.vaultFuture.Clear()
   385  		go r.vaultManager(r.recoveredVaultToken)
   386  	}
   387  
   388  	// Start the run loop
   389  	r.run()
   390  
   391  	// Do any cleanup necessary
   392  	r.postrun()
   393  
   394  	return
   395  }
   396  
   397  // validateTask validates the fields of the task and returns an error if the
   398  // task is invalid.
   399  func (r *TaskRunner) validateTask() error {
   400  	var mErr multierror.Error
   401  
   402  	// Validate the user.
   403  	unallowedUsers := r.config.ReadStringListToMapDefault("user.blacklist", config.DefaultUserBlacklist)
   404  	checkDrivers := r.config.ReadStringListToMapDefault("user.checked_drivers", config.DefaultUserCheckedDrivers)
   405  	if _, driverMatch := checkDrivers[r.task.Driver]; driverMatch {
   406  		if _, unallowed := unallowedUsers[r.task.User]; unallowed {
   407  			mErr.Errors = append(mErr.Errors, fmt.Errorf("running as user %q is disallowed", r.task.User))
   408  		}
   409  	}
   410  
   411  	// Validate the artifacts
   412  	for i, artifact := range r.task.Artifacts {
   413  		// Verify the artifact doesn't escape the task directory.
   414  		if err := artifact.Validate(); err != nil {
   415  			// If this error occurs there is potentially a server bug or
   416  			// mallicious, server spoofing.
   417  			r.logger.Printf("[ERR] client: allocation %q, task %v, artifact %#v (%v) fails validation: %v",
   418  				r.alloc.ID, r.task.Name, artifact, i, err)
   419  			mErr.Errors = append(mErr.Errors, fmt.Errorf("artifact (%d) failed validation: %v", i, err))
   420  		}
   421  	}
   422  
   423  	// Validate the Service names
   424  	for i, service := range r.task.Services {
   425  		name := r.taskEnv.ReplaceEnv(service.Name)
   426  		if err := service.ValidateName(name); err != nil {
   427  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service (%d) failed validation: %v", i, err))
   428  		}
   429  	}
   430  
   431  	if len(mErr.Errors) == 1 {
   432  		return mErr.Errors[0]
   433  	}
   434  	return mErr.ErrorOrNil()
   435  }
   436  
   437  // tokenFuture stores the Vault token and allows consumers to block till a valid
   438  // token exists
   439  type tokenFuture struct {
   440  	waiting []chan struct{}
   441  	token   string
   442  	set     bool
   443  	m       sync.Mutex
   444  }
   445  
   446  // NewTokenFuture returns a new token future without any token set
   447  func NewTokenFuture() *tokenFuture {
   448  	return &tokenFuture{}
   449  }
   450  
   451  // Wait returns a channel that can be waited on. When this channel unblocks, a
   452  // valid token will be available via the Get method
   453  func (f *tokenFuture) Wait() <-chan struct{} {
   454  	f.m.Lock()
   455  	defer f.m.Unlock()
   456  
   457  	c := make(chan struct{})
   458  	if f.set {
   459  		close(c)
   460  		return c
   461  	}
   462  
   463  	f.waiting = append(f.waiting, c)
   464  	return c
   465  }
   466  
   467  // Set sets the token value and unblocks any caller of Wait
   468  func (f *tokenFuture) Set(token string) *tokenFuture {
   469  	f.m.Lock()
   470  	defer f.m.Unlock()
   471  
   472  	f.set = true
   473  	f.token = token
   474  	for _, w := range f.waiting {
   475  		close(w)
   476  	}
   477  	f.waiting = nil
   478  	return f
   479  }
   480  
   481  // Clear clears the set vault token.
   482  func (f *tokenFuture) Clear() *tokenFuture {
   483  	f.m.Lock()
   484  	defer f.m.Unlock()
   485  
   486  	f.token = ""
   487  	f.set = false
   488  	return f
   489  }
   490  
   491  // Get returns the set Vault token
   492  func (f *tokenFuture) Get() string {
   493  	f.m.Lock()
   494  	defer f.m.Unlock()
   495  	return f.token
   496  }
   497  
   498  // vaultManager should be called in a go-routine and manages the derivation,
   499  // renewal and handling of errors with the Vault token. The optional parameter
   500  // allows setting the initial Vault token. This is useful when the Vault token
   501  // is recovered off disk.
   502  func (r *TaskRunner) vaultManager(token string) {
   503  	// updatedToken lets us store state between loops. If true, a new token
   504  	// has been retrieved and we need to apply the Vault change mode
   505  	var updatedToken bool
   506  
   507  OUTER:
   508  	for {
   509  		// Check if we should exit
   510  		select {
   511  		case <-r.waitCh:
   512  			return
   513  		default:
   514  		}
   515  
   516  		// Clear the token
   517  		r.vaultFuture.Clear()
   518  
   519  		// Check if there already is a token which can be the case for
   520  		// restoring the TaskRunner
   521  		if token == "" {
   522  			// Get a token
   523  			var exit bool
   524  			token, exit = r.deriveVaultToken()
   525  			if exit {
   526  				// Exit the manager
   527  				return
   528  			}
   529  
   530  			// Write the token to disk
   531  			if err := r.writeToken(token); err != nil {
   532  				e := fmt.Errorf("failed to write Vault token to disk")
   533  				r.logger.Printf("[ERR] client: %v for task %v on alloc %q: %v", e, r.task.Name, r.alloc.ID, err)
   534  				r.Kill("vault", e.Error(), true)
   535  				return
   536  			}
   537  		}
   538  
   539  		// Start the renewal process
   540  		renewCh, err := r.vaultClient.RenewToken(token, 30)
   541  
   542  		// An error returned means the token is not being renewed
   543  		if err != nil {
   544  			r.logger.Printf("[ERR] client: failed to start renewal of Vault token for task %v on alloc %q: %v", r.task.Name, r.alloc.ID, err)
   545  			token = ""
   546  			goto OUTER
   547  		}
   548  
   549  		// The Vault token is valid now, so set it
   550  		r.vaultFuture.Set(token)
   551  
   552  		if updatedToken {
   553  			switch r.task.Vault.ChangeMode {
   554  			case structs.VaultChangeModeSignal:
   555  				s, err := signals.Parse(r.task.Vault.ChangeSignal)
   556  				if err != nil {
   557  					e := fmt.Errorf("failed to parse signal: %v", err)
   558  					r.logger.Printf("[ERR] client: %v", err)
   559  					r.Kill("vault", e.Error(), true)
   560  					return
   561  				}
   562  
   563  				if err := r.Signal("vault", "new Vault token acquired", s); err != nil {
   564  					r.logger.Printf("[ERR] client: failed to send signal to task %v for alloc %q: %v", r.task.Name, r.alloc.ID, err)
   565  					r.Kill("vault", fmt.Sprintf("failed to send signal to task: %v", err), true)
   566  					return
   567  				}
   568  			case structs.VaultChangeModeRestart:
   569  				r.Restart("vault", "new Vault token acquired")
   570  			case structs.VaultChangeModeNoop:
   571  				fallthrough
   572  			default:
   573  				r.logger.Printf("[ERR] client: Invalid Vault change mode: %q", r.task.Vault.ChangeMode)
   574  			}
   575  
   576  			// We have handled it
   577  			updatedToken = false
   578  
   579  			// Call the handler
   580  			r.updatedTokenHandler()
   581  		}
   582  
   583  		// Start watching for renewal errors
   584  		select {
   585  		case err := <-renewCh:
   586  			// Clear the token
   587  			token = ""
   588  			r.logger.Printf("[ERR] client: failed to renew Vault token for task %v on alloc %q: %v", r.task.Name, r.alloc.ID, err)
   589  
   590  			// Check if we have to do anything
   591  			if r.task.Vault.ChangeMode != structs.VaultChangeModeNoop {
   592  				updatedToken = true
   593  			}
   594  		case <-r.waitCh:
   595  			return
   596  		}
   597  	}
   598  }
   599  
   600  // deriveVaultToken derives the Vault token using exponential backoffs. It
   601  // returns the Vault token and whether the manager should exit.
   602  func (r *TaskRunner) deriveVaultToken() (token string, exit bool) {
   603  	attempts := 0
   604  	for {
   605  		tokens, err := r.vaultClient.DeriveToken(r.alloc, []string{r.task.Name})
   606  		if err == nil {
   607  			return tokens[r.task.Name], false
   608  		}
   609  
   610  		// Check if we can't recover from the error
   611  		if rerr, ok := err.(*structs.RecoverableError); !ok || !rerr.Recoverable {
   612  			r.logger.Printf("[ERR] client: failed to derive Vault token for task %v on alloc %q: %v",
   613  				r.task.Name, r.alloc.ID, err)
   614  			r.Kill("vault", fmt.Sprintf("failed to derive token: %v", err), true)
   615  			return "", true
   616  		}
   617  
   618  		// Handle the retry case
   619  		backoff := (1 << (2 * uint64(attempts))) * vaultBackoffBaseline
   620  		if backoff > vaultBackoffLimit {
   621  			backoff = vaultBackoffLimit
   622  		}
   623  		r.logger.Printf("[ERR] client: failed to derive Vault token for task %v on alloc %q: %v; retrying in %v",
   624  			r.task.Name, r.alloc.ID, err, backoff)
   625  
   626  		attempts++
   627  
   628  		// Wait till retrying
   629  		select {
   630  		case <-r.waitCh:
   631  			return "", true
   632  		case <-time.After(backoff):
   633  		}
   634  	}
   635  }
   636  
   637  // writeToken writes the given token to disk
   638  func (r *TaskRunner) writeToken(token string) error {
   639  	// Write the token to disk
   640  	secretDir, err := r.ctx.AllocDir.GetSecretDir(r.task.Name)
   641  	if err != nil {
   642  		return fmt.Errorf("failed to determine task %s secret dir in alloc %q: %v", r.task.Name, r.alloc.ID, err)
   643  	}
   644  
   645  	// Write the token to the file system
   646  	tokenPath := filepath.Join(secretDir, vaultTokenFile)
   647  	if err := ioutil.WriteFile(tokenPath, []byte(token), 0777); err != nil {
   648  		return fmt.Errorf("failed to save Vault tokens to secret dir for task %q in alloc %q: %v", r.task.Name, r.alloc.ID, err)
   649  	}
   650  
   651  	return nil
   652  }
   653  
   654  // updatedTokenHandler is called when a new Vault token is retrieved. Things
   655  // that rely on the token should be updated here.
   656  func (r *TaskRunner) updatedTokenHandler() {
   657  
   658  	// Update the tasks environment
   659  	if err := r.setTaskEnv(); err != nil {
   660  		r.setState(
   661  			structs.TaskStateDead,
   662  			structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask())
   663  		return
   664  	}
   665  
   666  	if r.templateManager != nil {
   667  		r.templateManager.Stop()
   668  
   669  		// Create a new templateManager
   670  		var err error
   671  		r.templateManager, err = NewTaskTemplateManager(r, r.task.Templates,
   672  			r.config, r.vaultFuture.Get(), r.taskDir, r.getTaskEnv())
   673  		if err != nil {
   674  			err := fmt.Errorf("failed to build task's template manager: %v", err)
   675  			r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask())
   676  			r.logger.Printf("[ERR] client: alloc %q, task %q %v", r.alloc.ID, r.task.Name, err)
   677  			r.Kill("vault", err.Error(), true)
   678  			return
   679  		}
   680  	}
   681  }
   682  
   683  // prestart handles life-cycle tasks that occur before the task has started.
   684  func (r *TaskRunner) prestart(resultCh chan bool) {
   685  
   686  	if r.task.Vault != nil {
   687  		// Wait for the token
   688  		r.logger.Printf("[DEBUG] client: waiting for Vault token for task %v in alloc %q", r.task.Name, r.alloc.ID)
   689  		tokenCh := r.vaultFuture.Wait()
   690  		select {
   691  		case <-tokenCh:
   692  		case <-r.waitCh:
   693  			resultCh <- false
   694  			return
   695  		}
   696  		r.logger.Printf("[DEBUG] client: retrieved Vault token for task %v in alloc %q", r.task.Name, r.alloc.ID)
   697  	}
   698  
   699  	if err := r.setTaskEnv(); err != nil {
   700  		r.setState(
   701  			structs.TaskStateDead,
   702  			structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask())
   703  		resultCh <- false
   704  		return
   705  	}
   706  
   707  	for {
   708  		// Download the task's artifacts
   709  		if !r.artifactsDownloaded && len(r.task.Artifacts) > 0 {
   710  			r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskDownloadingArtifacts))
   711  			for _, artifact := range r.task.Artifacts {
   712  				if err := getter.GetArtifact(r.getTaskEnv(), artifact, r.taskDir); err != nil {
   713  					wrapped := fmt.Errorf("failed to download artifact %q: %v", artifact.GetterSource, err)
   714  					r.setState(structs.TaskStatePending,
   715  						structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(wrapped))
   716  					r.restartTracker.SetStartError(structs.NewRecoverableError(wrapped, true))
   717  					goto RESTART
   718  				}
   719  			}
   720  
   721  			r.artifactsDownloaded = true
   722  		}
   723  
   724  		// We don't have to wait for any template
   725  		if len(r.task.Templates) == 0 {
   726  			// Send the start signal
   727  			select {
   728  			case r.startCh <- struct{}{}:
   729  			default:
   730  			}
   731  
   732  			resultCh <- true
   733  			return
   734  		}
   735  
   736  		// Build the template manager
   737  		if r.templateManager == nil {
   738  			var err error
   739  			r.templateManager, err = NewTaskTemplateManager(r, r.task.Templates,
   740  				r.config, r.vaultFuture.Get(), r.taskDir, r.getTaskEnv())
   741  			if err != nil {
   742  				err := fmt.Errorf("failed to build task's template manager: %v", err)
   743  				r.setState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskSetupFailure).SetSetupError(err).SetFailsTask())
   744  				r.logger.Printf("[ERR] client: alloc %q, task %q %v", r.alloc.ID, r.task.Name, err)
   745  				resultCh <- false
   746  				return
   747  			}
   748  		}
   749  
   750  		// Block for consul-template
   751  		// TODO Hooks should register themselves as blocking and then we can
   752  		// perioidcally enumerate what we are still blocked on
   753  		select {
   754  		case <-r.unblockCh:
   755  			// Send the start signal
   756  			select {
   757  			case r.startCh <- struct{}{}:
   758  			default:
   759  			}
   760  
   761  			resultCh <- true
   762  			return
   763  		case <-r.waitCh:
   764  			// The run loop has exited so exit too
   765  			resultCh <- false
   766  			return
   767  		}
   768  
   769  	RESTART:
   770  		restart := r.shouldRestart()
   771  		if !restart {
   772  			resultCh <- false
   773  			return
   774  		}
   775  	}
   776  }
   777  
   778  // postrun is used to do any cleanup that is necessary after exiting the runloop
   779  func (r *TaskRunner) postrun() {
   780  	// Stop the template manager
   781  	if r.templateManager != nil {
   782  		r.templateManager.Stop()
   783  	}
   784  }
   785  
   786  // run is the main run loop that handles starting the application, destroying
   787  // it, restarts and signals.
   788  func (r *TaskRunner) run() {
   789  	// Predeclare things so we can jump to the RESTART
   790  	var stopCollection chan struct{}
   791  	var handleWaitCh chan *dstructs.WaitResult
   792  
   793  	for {
   794  		// Do the prestart activities
   795  		prestartResultCh := make(chan bool, 1)
   796  		go r.prestart(prestartResultCh)
   797  
   798  	WAIT:
   799  		for {
   800  			select {
   801  			case success := <-prestartResultCh:
   802  				if !success {
   803  					r.setState(structs.TaskStateDead, nil)
   804  					return
   805  				}
   806  			case <-r.startCh:
   807  				// Start the task if not yet started or it is being forced. This logic
   808  				// is necessary because in the case of a restore the handle already
   809  				// exists.
   810  				r.handleLock.Lock()
   811  				handleEmpty := r.handle == nil
   812  				r.handleLock.Unlock()
   813  
   814  				if handleEmpty {
   815  					startErr := r.startTask()
   816  					r.restartTracker.SetStartError(startErr)
   817  					if startErr != nil {
   818  						r.setState("", structs.NewTaskEvent(structs.TaskDriverFailure).SetDriverError(startErr))
   819  						goto RESTART
   820  					}
   821  
   822  					// Mark the task as started
   823  					r.setState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
   824  					r.runningLock.Lock()
   825  					r.running = true
   826  					r.runningLock.Unlock()
   827  				}
   828  
   829  				if stopCollection == nil {
   830  					stopCollection = make(chan struct{})
   831  					go r.collectResourceUsageStats(stopCollection)
   832  				}
   833  
   834  				handleWaitCh = r.handle.WaitCh()
   835  
   836  			case waitRes := <-handleWaitCh:
   837  				if waitRes == nil {
   838  					panic("nil wait")
   839  				}
   840  
   841  				r.runningLock.Lock()
   842  				r.running = false
   843  				r.runningLock.Unlock()
   844  
   845  				// Stop collection of the task's resource usage
   846  				close(stopCollection)
   847  
   848  				// Log whether the task was successful or not.
   849  				r.restartTracker.SetWaitResult(waitRes)
   850  				r.setState("", r.waitErrorToEvent(waitRes))
   851  				if !waitRes.Successful() {
   852  					r.logger.Printf("[INFO] client: task %q for alloc %q failed: %v", r.task.Name, r.alloc.ID, waitRes)
   853  				} else {
   854  					r.logger.Printf("[INFO] client: task %q for alloc %q completed successfully", r.task.Name, r.alloc.ID)
   855  				}
   856  
   857  				break WAIT
   858  			case update := <-r.updateCh:
   859  				if err := r.handleUpdate(update); err != nil {
   860  					r.logger.Printf("[ERR] client: update to task %q failed: %v", r.task.Name, err)
   861  				}
   862  
   863  			case se := <-r.signalCh:
   864  				r.logger.Printf("[DEBUG] client: task being signalled with %v: %s", se.s, se.e.TaskSignalReason)
   865  				r.setState(structs.TaskStateRunning, se.e)
   866  
   867  				res := r.handle.Signal(se.s)
   868  				se.result <- res
   869  
   870  			case event := <-r.restartCh:
   871  				r.logger.Printf("[DEBUG] client: task being restarted: %s", event.RestartReason)
   872  				r.setState(structs.TaskStateRunning, event)
   873  				r.killTask(nil)
   874  
   875  				close(stopCollection)
   876  
   877  				if handleWaitCh != nil {
   878  					<-handleWaitCh
   879  				}
   880  
   881  				// Since the restart isn't from a failure, restart immediately
   882  				// and don't count against the restart policy
   883  				r.restartTracker.SetRestartTriggered()
   884  				break WAIT
   885  
   886  			case <-r.destroyCh:
   887  				r.runningLock.Lock()
   888  				running := r.running
   889  				r.runningLock.Unlock()
   890  				if !running {
   891  					r.setState(structs.TaskStateDead, r.destroyEvent)
   892  					return
   893  				}
   894  
   895  				// Store the task event that provides context on the task
   896  				// destroy. The Killed event is set from the alloc_runner and
   897  				// doesn't add detail
   898  				var killEvent *structs.TaskEvent
   899  				if r.destroyEvent.Type != structs.TaskKilled {
   900  					if r.destroyEvent.Type == structs.TaskKilling {
   901  						killEvent = r.destroyEvent
   902  					} else {
   903  						r.setState(structs.TaskStateRunning, r.destroyEvent)
   904  					}
   905  				}
   906  
   907  				r.killTask(killEvent)
   908  				close(stopCollection)
   909  				r.setState(structs.TaskStateDead, nil)
   910  				return
   911  			}
   912  		}
   913  
   914  	RESTART:
   915  		restart := r.shouldRestart()
   916  		if !restart {
   917  			r.setState(structs.TaskStateDead, nil)
   918  			return
   919  		}
   920  
   921  		// Clear the handle so a new driver will be created.
   922  		r.handleLock.Lock()
   923  		r.handle = nil
   924  		handleWaitCh = nil
   925  		stopCollection = nil
   926  		r.handleLock.Unlock()
   927  	}
   928  }
   929  
   930  // shouldRestart returns if the task should restart. If the return value is
   931  // true, the task's restart policy has already been considered and any wait time
   932  // between restarts has been applied.
   933  func (r *TaskRunner) shouldRestart() bool {
   934  	state, when := r.restartTracker.GetState()
   935  	reason := r.restartTracker.GetReason()
   936  	switch state {
   937  	case structs.TaskNotRestarting, structs.TaskTerminated:
   938  		r.logger.Printf("[INFO] client: Not restarting task: %v for alloc: %v ", r.task.Name, r.alloc.ID)
   939  		if state == structs.TaskNotRestarting {
   940  			r.setState(structs.TaskStateDead,
   941  				structs.NewTaskEvent(structs.TaskNotRestarting).
   942  					SetRestartReason(reason).SetFailsTask())
   943  		}
   944  		return false
   945  	case structs.TaskRestarting:
   946  		r.logger.Printf("[INFO] client: Restarting task %q for alloc %q in %v", r.task.Name, r.alloc.ID, when)
   947  		r.setState(structs.TaskStatePending,
   948  			structs.NewTaskEvent(structs.TaskRestarting).
   949  				SetRestartDelay(when).
   950  				SetRestartReason(reason))
   951  	default:
   952  		r.logger.Printf("[ERR] client: restart tracker returned unknown state: %q", state)
   953  		return false
   954  	}
   955  
   956  	// Sleep but watch for destroy events.
   957  	select {
   958  	case <-time.After(when):
   959  	case <-r.destroyCh:
   960  	}
   961  
   962  	// Destroyed while we were waiting to restart, so abort.
   963  	r.destroyLock.Lock()
   964  	destroyed := r.destroy
   965  	r.destroyLock.Unlock()
   966  	if destroyed {
   967  		r.logger.Printf("[DEBUG] client: Not restarting task: %v because it has been destroyed", r.task.Name)
   968  		r.setState(structs.TaskStateDead, r.destroyEvent)
   969  		return false
   970  	}
   971  
   972  	return true
   973  }
   974  
   975  // killTask kills the running task. A killing event can optionally be passed and
   976  // this event is used to mark the task as being killed. It provides a means to
   977  // store extra information.
   978  func (r *TaskRunner) killTask(killingEvent *structs.TaskEvent) {
   979  	r.runningLock.Lock()
   980  	running := r.running
   981  	r.runningLock.Unlock()
   982  	if !running {
   983  		return
   984  	}
   985  
   986  	// Get the kill timeout
   987  	timeout := driver.GetKillTimeout(r.task.KillTimeout, r.config.MaxKillTimeout)
   988  
   989  	// Build the event
   990  	var event *structs.TaskEvent
   991  	if killingEvent != nil {
   992  		event = killingEvent
   993  		event.Type = structs.TaskKilling
   994  	} else {
   995  		event = structs.NewTaskEvent(structs.TaskKilling)
   996  	}
   997  	event.SetKillTimeout(timeout)
   998  
   999  	// Mark that we received the kill event
  1000  	r.setState(structs.TaskStateRunning, event)
  1001  
  1002  	// Kill the task using an exponential backoff in-case of failures.
  1003  	destroySuccess, err := r.handleDestroy()
  1004  	if !destroySuccess {
  1005  		// We couldn't successfully destroy the resource created.
  1006  		r.logger.Printf("[ERR] client: failed to kill task %q. Resources may have been leaked: %v", r.task.Name, err)
  1007  	}
  1008  
  1009  	r.runningLock.Lock()
  1010  	r.running = false
  1011  	r.runningLock.Unlock()
  1012  
  1013  	// Store that the task has been destroyed and any associated error.
  1014  	r.setState("", structs.NewTaskEvent(structs.TaskKilled).SetKillError(err))
  1015  }
  1016  
  1017  // startTask creates the driver and starts the task.
  1018  func (r *TaskRunner) startTask() error {
  1019  	// Create a driver
  1020  	driver, err := r.createDriver()
  1021  	if err != nil {
  1022  		return fmt.Errorf("failed to create driver of task '%s' for alloc '%s': %v",
  1023  			r.task.Name, r.alloc.ID, err)
  1024  	}
  1025  
  1026  	// Start the job
  1027  	handle, err := driver.Start(r.ctx, r.task)
  1028  	if err != nil {
  1029  		wrapped := fmt.Errorf("failed to start task '%s' for alloc '%s': %v",
  1030  			r.task.Name, r.alloc.ID, err)
  1031  
  1032  		r.logger.Printf("[INFO] client: %v", wrapped)
  1033  
  1034  		if rerr, ok := err.(*structs.RecoverableError); ok {
  1035  			return structs.NewRecoverableError(wrapped, rerr.Recoverable)
  1036  		}
  1037  
  1038  		return wrapped
  1039  
  1040  	}
  1041  
  1042  	r.handleLock.Lock()
  1043  	r.handle = handle
  1044  	r.handleLock.Unlock()
  1045  	return nil
  1046  }
  1047  
  1048  // collectResourceUsageStats starts collecting resource usage stats of a Task.
  1049  // Collection ends when the passed channel is closed
  1050  func (r *TaskRunner) collectResourceUsageStats(stopCollection <-chan struct{}) {
  1051  	// start collecting the stats right away and then start collecting every
  1052  	// collection interval
  1053  	next := time.NewTimer(0)
  1054  	defer next.Stop()
  1055  	for {
  1056  		select {
  1057  		case <-next.C:
  1058  			next.Reset(r.config.StatsCollectionInterval)
  1059  			if r.handle == nil {
  1060  				continue
  1061  			}
  1062  			ru, err := r.handle.Stats()
  1063  
  1064  			if err != nil {
  1065  				// We do not log when the plugin is shutdown as this is simply a
  1066  				// race between the stopCollection channel being closed and calling
  1067  				// Stats on the handle.
  1068  				if !strings.Contains(err.Error(), "connection is shut down") {
  1069  					r.logger.Printf("[WARN] client: error fetching stats of task %v: %v", r.task.Name, err)
  1070  				}
  1071  				continue
  1072  			}
  1073  
  1074  			r.resourceUsageLock.Lock()
  1075  			r.resourceUsage = ru
  1076  			r.resourceUsageLock.Unlock()
  1077  			if ru != nil {
  1078  				r.emitStats(ru)
  1079  			}
  1080  		case <-stopCollection:
  1081  			return
  1082  		}
  1083  	}
  1084  }
  1085  
  1086  // LatestResourceUsage returns the last resource utilization datapoint collected
  1087  func (r *TaskRunner) LatestResourceUsage() *cstructs.TaskResourceUsage {
  1088  	r.resourceUsageLock.RLock()
  1089  	defer r.resourceUsageLock.RUnlock()
  1090  	r.runningLock.Lock()
  1091  	defer r.runningLock.Unlock()
  1092  
  1093  	// If the task is not running there can be no latest resource
  1094  	if !r.running {
  1095  		return nil
  1096  	}
  1097  
  1098  	return r.resourceUsage
  1099  }
  1100  
  1101  // handleUpdate takes an updated allocation and updates internal state to
  1102  // reflect the new config for the task.
  1103  func (r *TaskRunner) handleUpdate(update *structs.Allocation) error {
  1104  	// Extract the task group from the alloc.
  1105  	tg := update.Job.LookupTaskGroup(update.TaskGroup)
  1106  	if tg == nil {
  1107  		return fmt.Errorf("alloc '%s' missing task group '%s'", update.ID, update.TaskGroup)
  1108  	}
  1109  
  1110  	// Extract the task.
  1111  	var updatedTask *structs.Task
  1112  	for _, t := range tg.Tasks {
  1113  		if t.Name == r.task.Name {
  1114  			updatedTask = t.Copy()
  1115  		}
  1116  	}
  1117  	if updatedTask == nil {
  1118  		return fmt.Errorf("task group %q doesn't contain task %q", tg.Name, r.task.Name)
  1119  	}
  1120  
  1121  	// Merge in the task resources
  1122  	updatedTask.Resources = update.TaskResources[updatedTask.Name]
  1123  
  1124  	// Update will update resources and store the new kill timeout.
  1125  	var mErr multierror.Error
  1126  	r.handleLock.Lock()
  1127  	if r.handle != nil {
  1128  		if err := r.handle.Update(updatedTask); err != nil {
  1129  			mErr.Errors = append(mErr.Errors, fmt.Errorf("updating task resources failed: %v", err))
  1130  		}
  1131  	}
  1132  	r.handleLock.Unlock()
  1133  
  1134  	// Update the restart policy.
  1135  	if r.restartTracker != nil {
  1136  		r.restartTracker.SetPolicy(tg.RestartPolicy)
  1137  	}
  1138  
  1139  	// Store the updated alloc.
  1140  	r.alloc = update
  1141  	r.task = updatedTask
  1142  	return mErr.ErrorOrNil()
  1143  }
  1144  
  1145  // handleDestroy kills the task handle. In the case that killing fails,
  1146  // handleDestroy will retry with an exponential backoff and will give up at a
  1147  // given limit. It returns whether the task was destroyed and the error
  1148  // associated with the last kill attempt.
  1149  func (r *TaskRunner) handleDestroy() (destroyed bool, err error) {
  1150  	// Cap the number of times we attempt to kill the task.
  1151  	for i := 0; i < killFailureLimit; i++ {
  1152  		if err = r.handle.Kill(); err != nil {
  1153  			// Calculate the new backoff
  1154  			backoff := (1 << (2 * uint64(i))) * killBackoffBaseline
  1155  			if backoff > killBackoffLimit {
  1156  				backoff = killBackoffLimit
  1157  			}
  1158  
  1159  			r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc %q. Retrying in %v: %v",
  1160  				r.task.Name, r.alloc.ID, backoff, err)
  1161  			time.Sleep(time.Duration(backoff))
  1162  		} else {
  1163  			// Kill was successful
  1164  			return true, nil
  1165  		}
  1166  	}
  1167  	return
  1168  }
  1169  
  1170  // Restart will restart the task
  1171  func (r *TaskRunner) Restart(source, reason string) {
  1172  
  1173  	reasonStr := fmt.Sprintf("%s: %s", source, reason)
  1174  	event := structs.NewTaskEvent(structs.TaskRestartSignal).SetRestartReason(reasonStr)
  1175  
  1176  	r.logger.Printf("[DEBUG] client: restarting task %v for alloc %q: %v",
  1177  		r.task.Name, r.alloc.ID, reasonStr)
  1178  
  1179  	r.runningLock.Lock()
  1180  	running := r.running
  1181  	r.runningLock.Unlock()
  1182  
  1183  	// Drop the restart event
  1184  	if !running {
  1185  		r.logger.Printf("[DEBUG] client: skipping restart since task isn't running")
  1186  		return
  1187  	}
  1188  
  1189  	select {
  1190  	case r.restartCh <- event:
  1191  	case <-r.waitCh:
  1192  	}
  1193  }
  1194  
  1195  // Signal will send a signal to the task
  1196  func (r *TaskRunner) Signal(source, reason string, s os.Signal) error {
  1197  
  1198  	reasonStr := fmt.Sprintf("%s: %s", source, reason)
  1199  	event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetTaskSignalReason(reasonStr)
  1200  
  1201  	r.logger.Printf("[DEBUG] client: sending signal %v to task %v for alloc %q", s, r.task.Name, r.alloc.ID)
  1202  
  1203  	r.runningLock.Lock()
  1204  	running := r.running
  1205  	r.runningLock.Unlock()
  1206  
  1207  	// Drop the restart event
  1208  	if !running {
  1209  		r.logger.Printf("[DEBUG] client: skipping signal since task isn't running")
  1210  		return nil
  1211  	}
  1212  
  1213  	resCh := make(chan error)
  1214  	se := SignalEvent{
  1215  		s:      s,
  1216  		e:      event,
  1217  		result: resCh,
  1218  	}
  1219  	select {
  1220  	case r.signalCh <- se:
  1221  	case <-r.waitCh:
  1222  	}
  1223  
  1224  	return <-resCh
  1225  }
  1226  
  1227  // Kill will kill a task and store the error, no longer restarting the task. If
  1228  // fail is set, the task is marked as having failed.
  1229  func (r *TaskRunner) Kill(source, reason string, fail bool) {
  1230  	reasonStr := fmt.Sprintf("%s: %s", source, reason)
  1231  	event := structs.NewTaskEvent(structs.TaskKilling).SetKillReason(reasonStr)
  1232  	if fail {
  1233  		event.SetFailsTask()
  1234  	}
  1235  
  1236  	r.logger.Printf("[DEBUG] client: killing task %v for alloc %q: %v", r.task.Name, r.alloc.ID, reasonStr)
  1237  	r.Destroy(event)
  1238  }
  1239  
  1240  // UnblockStart unblocks the starting of the task. It currently assumes only
  1241  // consul-template will unblock
  1242  func (r *TaskRunner) UnblockStart(source string) {
  1243  	r.unblockLock.Lock()
  1244  	defer r.unblockLock.Unlock()
  1245  	if r.unblocked {
  1246  		return
  1247  	}
  1248  
  1249  	r.logger.Printf("[DEBUG] client: unblocking task %v for alloc %q: %v", r.task.Name, r.alloc.ID, source)
  1250  	r.unblocked = true
  1251  	close(r.unblockCh)
  1252  }
  1253  
  1254  // Helper function for converting a WaitResult into a TaskTerminated event.
  1255  func (r *TaskRunner) waitErrorToEvent(res *dstructs.WaitResult) *structs.TaskEvent {
  1256  	return structs.NewTaskEvent(structs.TaskTerminated).
  1257  		SetExitCode(res.ExitCode).
  1258  		SetSignal(res.Signal).
  1259  		SetExitMessage(res.Err)
  1260  }
  1261  
  1262  // Update is used to update the task of the context
  1263  func (r *TaskRunner) Update(update *structs.Allocation) {
  1264  	select {
  1265  	case r.updateCh <- update:
  1266  	default:
  1267  		r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')",
  1268  			r.task.Name, r.alloc.ID)
  1269  	}
  1270  }
  1271  
  1272  // Destroy is used to indicate that the task context should be destroyed. The
  1273  // event parameter provides a context for the destroy.
  1274  func (r *TaskRunner) Destroy(event *structs.TaskEvent) {
  1275  	r.destroyLock.Lock()
  1276  	defer r.destroyLock.Unlock()
  1277  
  1278  	if r.destroy {
  1279  		return
  1280  	}
  1281  	r.destroy = true
  1282  	r.destroyEvent = event
  1283  	close(r.destroyCh)
  1284  }
  1285  
  1286  // emitStats emits resource usage stats of tasks to remote metrics collector
  1287  // sinks
  1288  func (r *TaskRunner) emitStats(ru *cstructs.TaskResourceUsage) {
  1289  	if ru.ResourceUsage.MemoryStats != nil && r.config.PublishAllocationMetrics {
  1290  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "rss"}, float32(ru.ResourceUsage.MemoryStats.RSS))
  1291  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "cache"}, float32(ru.ResourceUsage.MemoryStats.Cache))
  1292  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "swap"}, float32(ru.ResourceUsage.MemoryStats.Swap))
  1293  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "max_usage"}, float32(ru.ResourceUsage.MemoryStats.MaxUsage))
  1294  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelUsage))
  1295  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "memory", "kernel_max_usage"}, float32(ru.ResourceUsage.MemoryStats.KernelMaxUsage))
  1296  	}
  1297  
  1298  	if ru.ResourceUsage.CpuStats != nil && r.config.PublishAllocationMetrics {
  1299  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_percent"}, float32(ru.ResourceUsage.CpuStats.Percent))
  1300  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "system"}, float32(ru.ResourceUsage.CpuStats.SystemMode))
  1301  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "user"}, float32(ru.ResourceUsage.CpuStats.UserMode))
  1302  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_time"}, float32(ru.ResourceUsage.CpuStats.ThrottledTime))
  1303  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "throttled_periods"}, float32(ru.ResourceUsage.CpuStats.ThrottledPeriods))
  1304  		metrics.SetGauge([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, r.alloc.ID, r.task.Name, "cpu", "total_ticks"}, float32(ru.ResourceUsage.CpuStats.TotalTicks))
  1305  	}
  1306  }