github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/alloc_runner.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"path/filepath"
     7  	"sync"
     8  	"time"
     9  
    10  	log "github.com/hashicorp/go-hclog"
    11  	multierror "github.com/hashicorp/go-multierror"
    12  	"github.com/hashicorp/nomad/client/allocdir"
    13  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    14  	"github.com/hashicorp/nomad/client/allocrunner/state"
    15  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner"
    16  	"github.com/hashicorp/nomad/client/allocwatcher"
    17  	"github.com/hashicorp/nomad/client/config"
    18  	"github.com/hashicorp/nomad/client/consul"
    19  	"github.com/hashicorp/nomad/client/devicemanager"
    20  	"github.com/hashicorp/nomad/client/dynamicplugins"
    21  	cinterfaces "github.com/hashicorp/nomad/client/interfaces"
    22  	"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
    23  	"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
    24  	cstate "github.com/hashicorp/nomad/client/state"
    25  	cstructs "github.com/hashicorp/nomad/client/structs"
    26  	"github.com/hashicorp/nomad/client/vaultclient"
    27  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    28  	"github.com/hashicorp/nomad/helper"
    29  	"github.com/hashicorp/nomad/nomad/structs"
    30  	"github.com/hashicorp/nomad/plugins/device"
    31  	"github.com/hashicorp/nomad/plugins/drivers"
    32  )
    33  
    34  // allocRunner is used to run all the tasks in a given allocation
    35  type allocRunner struct {
    36  	// id is the ID of the allocation. Can be accessed without a lock
    37  	id string
    38  
    39  	// Logger is the logger for the alloc runner.
    40  	logger log.Logger
    41  
    42  	clientConfig *config.Config
    43  
    44  	// stateUpdater is used to emit updated alloc state
    45  	stateUpdater cinterfaces.AllocStateHandler
    46  
    47  	// taskStateUpdatedCh is ticked whenever task state as changed. Must
    48  	// have len==1 to allow nonblocking notification of state updates while
    49  	// the goroutine is already processing a previous update.
    50  	taskStateUpdatedCh chan struct{}
    51  
    52  	// taskStateUpdateHandlerCh is closed when the task state handling
    53  	// goroutine exits. It is unsafe to destroy the local allocation state
    54  	// before this goroutine exits.
    55  	taskStateUpdateHandlerCh chan struct{}
    56  
    57  	// allocUpdatedCh is a channel that is used to stream allocation updates into
    58  	// the allocUpdate handler. Must have len==1 to allow nonblocking notification
    59  	// of new allocation updates while the goroutine is processing a previous
    60  	// update.
    61  	allocUpdatedCh chan *structs.Allocation
    62  
    63  	// consulClient is the client used by the consul service hook for
    64  	// registering services and checks
    65  	consulClient consul.ConsulServiceAPI
    66  
    67  	// consulProxiesClient is the client used by the envoy version hook for
    68  	// looking up supported envoy versions of the consul agent.
    69  	consulProxiesClient consul.SupportedProxiesAPI
    70  
    71  	// sidsClient is the client used by the service identity hook for
    72  	// managing SI tokens
    73  	sidsClient consul.ServiceIdentityAPI
    74  
    75  	// vaultClient is the used to manage Vault tokens
    76  	vaultClient vaultclient.VaultClient
    77  
    78  	// waitCh is closed when the Run loop has exited
    79  	waitCh chan struct{}
    80  
    81  	// destroyed is true when the Run loop has exited, postrun hooks have
    82  	// run, and alloc runner has been destroyed. Must acquire destroyedLock
    83  	// to access.
    84  	destroyed bool
    85  
    86  	// destroyCh is closed when the Run loop has exited, postrun hooks have
    87  	// run, and alloc runner has been destroyed.
    88  	destroyCh chan struct{}
    89  
    90  	// shutdown is true when the Run loop has exited, and shutdown hooks have
    91  	// run. Must acquire destroyedLock to access.
    92  	shutdown bool
    93  
    94  	// shutdownCh is closed when the Run loop has exited, and shutdown hooks
    95  	// have run.
    96  	shutdownCh chan struct{}
    97  
    98  	// destroyLaunched is true if Destroy has been called. Must acquire
    99  	// destroyedLock to access.
   100  	destroyLaunched bool
   101  
   102  	// shutdownLaunched is true if Shutdown has been called. Must acquire
   103  	// destroyedLock to access.
   104  	shutdownLaunched bool
   105  
   106  	// destroyedLock guards destroyed, destroyLaunched, shutdownLaunched,
   107  	// and serializes Shutdown/Destroy calls.
   108  	destroyedLock sync.Mutex
   109  
   110  	// Alloc captures the allocation being run.
   111  	alloc     *structs.Allocation
   112  	allocLock sync.RWMutex
   113  
   114  	// state is the alloc runner's state
   115  	state     *state.State
   116  	stateLock sync.RWMutex
   117  
   118  	stateDB cstate.StateDB
   119  
   120  	// allocDir is used to build the allocations directory structure.
   121  	allocDir *allocdir.AllocDir
   122  
   123  	// runnerHooks are alloc runner lifecycle hooks that should be run on state
   124  	// transistions.
   125  	runnerHooks []interfaces.RunnerHook
   126  
   127  	// hookState is the output of allocrunner hooks
   128  	hookState   *cstructs.AllocHookResources
   129  	hookStateMu sync.RWMutex
   130  
   131  	// tasks are the set of task runners
   132  	tasks map[string]*taskrunner.TaskRunner
   133  
   134  	// deviceStatsReporter is used to lookup resource usage for alloc devices
   135  	deviceStatsReporter cinterfaces.DeviceStatsReporter
   136  
   137  	// allocBroadcaster sends client allocation updates to all listeners
   138  	allocBroadcaster *cstructs.AllocBroadcaster
   139  
   140  	// prevAllocWatcher allows waiting for any previous or preempted allocations
   141  	// to exit
   142  	prevAllocWatcher allocwatcher.PrevAllocWatcher
   143  
   144  	// prevAllocMigrator allows the migration of a previous allocations alloc dir.
   145  	prevAllocMigrator allocwatcher.PrevAllocMigrator
   146  
   147  	// dynamicRegistry contains all locally registered dynamic plugins (e.g csi
   148  	// plugins).
   149  	dynamicRegistry dynamicplugins.Registry
   150  
   151  	// csiManager is used to wait for CSI Volumes to be attached, and by the task
   152  	// runner to manage their mounting
   153  	csiManager csimanager.Manager
   154  
   155  	// devicemanager is used to mount devices as well as lookup device
   156  	// statistics
   157  	devicemanager devicemanager.Manager
   158  
   159  	// driverManager is responsible for dispensing driver plugins and registering
   160  	// event handlers
   161  	driverManager drivermanager.Manager
   162  
   163  	// serversContactedCh is passed to TaskRunners so they can detect when
   164  	// servers have been contacted for the first time in case of a failed
   165  	// restore.
   166  	serversContactedCh chan struct{}
   167  
   168  	taskHookCoordinator *taskHookCoordinator
   169  
   170  	// rpcClient is the RPC Client that should be used by the allocrunner and its
   171  	// hooks to communicate with Nomad Servers.
   172  	rpcClient RPCer
   173  }
   174  
   175  // RPCer is the interface needed by hooks to make RPC calls.
   176  type RPCer interface {
   177  	RPC(method string, args interface{}, reply interface{}) error
   178  }
   179  
   180  // NewAllocRunner returns a new allocation runner.
   181  func NewAllocRunner(config *Config) (*allocRunner, error) {
   182  	alloc := config.Alloc
   183  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   184  	if tg == nil {
   185  		return nil, fmt.Errorf("failed to lookup task group %q", alloc.TaskGroup)
   186  	}
   187  
   188  	ar := &allocRunner{
   189  		id:                       alloc.ID,
   190  		alloc:                    alloc,
   191  		clientConfig:             config.ClientConfig,
   192  		consulClient:             config.Consul,
   193  		consulProxiesClient:      config.ConsulProxies,
   194  		sidsClient:               config.ConsulSI,
   195  		vaultClient:              config.Vault,
   196  		tasks:                    make(map[string]*taskrunner.TaskRunner, len(tg.Tasks)),
   197  		waitCh:                   make(chan struct{}),
   198  		destroyCh:                make(chan struct{}),
   199  		shutdownCh:               make(chan struct{}),
   200  		state:                    &state.State{},
   201  		stateDB:                  config.StateDB,
   202  		stateUpdater:             config.StateUpdater,
   203  		taskStateUpdatedCh:       make(chan struct{}, 1),
   204  		taskStateUpdateHandlerCh: make(chan struct{}),
   205  		allocUpdatedCh:           make(chan *structs.Allocation, 1),
   206  		deviceStatsReporter:      config.DeviceStatsReporter,
   207  		prevAllocWatcher:         config.PrevAllocWatcher,
   208  		prevAllocMigrator:        config.PrevAllocMigrator,
   209  		dynamicRegistry:          config.DynamicRegistry,
   210  		csiManager:               config.CSIManager,
   211  		devicemanager:            config.DeviceManager,
   212  		driverManager:            config.DriverManager,
   213  		serversContactedCh:       config.ServersContactedCh,
   214  		rpcClient:                config.RPCClient,
   215  	}
   216  
   217  	// Create the logger based on the allocation ID
   218  	ar.logger = config.Logger.Named("alloc_runner").With("alloc_id", alloc.ID)
   219  
   220  	// Create alloc broadcaster
   221  	ar.allocBroadcaster = cstructs.NewAllocBroadcaster(ar.logger)
   222  
   223  	// Create alloc dir
   224  	ar.allocDir = allocdir.NewAllocDir(ar.logger, filepath.Join(config.ClientConfig.AllocDir, alloc.ID))
   225  
   226  	ar.taskHookCoordinator = newTaskHookCoordinator(ar.logger, tg.Tasks)
   227  
   228  	// Initialize the runners hooks.
   229  	if err := ar.initRunnerHooks(config.ClientConfig); err != nil {
   230  		return nil, err
   231  	}
   232  
   233  	// Create the TaskRunners
   234  	if err := ar.initTaskRunners(tg.Tasks); err != nil {
   235  		return nil, err
   236  	}
   237  
   238  	return ar, nil
   239  }
   240  
   241  // initTaskRunners creates task runners but does *not* run them.
   242  func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error {
   243  	for _, task := range tasks {
   244  		trConfig := &taskrunner.Config{
   245  			Alloc:                ar.alloc,
   246  			ClientConfig:         ar.clientConfig,
   247  			Task:                 task,
   248  			TaskDir:              ar.allocDir.NewTaskDir(task.Name),
   249  			Logger:               ar.logger,
   250  			StateDB:              ar.stateDB,
   251  			StateUpdater:         ar,
   252  			DynamicRegistry:      ar.dynamicRegistry,
   253  			Consul:               ar.consulClient,
   254  			ConsulProxies:        ar.consulProxiesClient,
   255  			ConsulSI:             ar.sidsClient,
   256  			Vault:                ar.vaultClient,
   257  			DeviceStatsReporter:  ar.deviceStatsReporter,
   258  			CSIManager:           ar.csiManager,
   259  			DeviceManager:        ar.devicemanager,
   260  			DriverManager:        ar.driverManager,
   261  			ServersContactedCh:   ar.serversContactedCh,
   262  			StartConditionMetCtx: ar.taskHookCoordinator.startConditionForTask(task),
   263  		}
   264  
   265  		// Create, but do not Run, the task runner
   266  		tr, err := taskrunner.NewTaskRunner(trConfig)
   267  		if err != nil {
   268  			return fmt.Errorf("failed creating runner for task %q: %v", task.Name, err)
   269  		}
   270  
   271  		ar.tasks[task.Name] = tr
   272  	}
   273  	return nil
   274  }
   275  
   276  func (ar *allocRunner) WaitCh() <-chan struct{} {
   277  	return ar.waitCh
   278  }
   279  
   280  // Run the AllocRunner. Starts tasks if the alloc is non-terminal and closes
   281  // WaitCh when it exits. Should be started in a goroutine.
   282  func (ar *allocRunner) Run() {
   283  	// Close the wait channel on return
   284  	defer close(ar.waitCh)
   285  
   286  	// Start the task state update handler
   287  	go ar.handleTaskStateUpdates()
   288  
   289  	// Start the alloc update handler
   290  	go ar.handleAllocUpdates()
   291  
   292  	// If task update chan has been closed, that means we've been shutdown.
   293  	select {
   294  	case <-ar.taskStateUpdateHandlerCh:
   295  		return
   296  	default:
   297  	}
   298  
   299  	// When handling (potentially restored) terminal alloc, ensure tasks and post-run hooks are run
   300  	// to perform any cleanup that's necessary, potentially not done prior to earlier termination
   301  
   302  	// Run the prestart hooks if non-terminal
   303  	if ar.shouldRun() {
   304  		if err := ar.prerun(); err != nil {
   305  			ar.logger.Error("prerun failed", "error", err)
   306  
   307  			for _, tr := range ar.tasks {
   308  				tr.MarkFailedDead(fmt.Sprintf("failed to setup alloc: %v", err))
   309  			}
   310  
   311  			goto POST
   312  		}
   313  	}
   314  
   315  	// Run the runners (blocks until they exit)
   316  	ar.runTasks()
   317  
   318  POST:
   319  	if ar.isShuttingDown() {
   320  		return
   321  	}
   322  
   323  	// Run the postrun hooks
   324  	if err := ar.postrun(); err != nil {
   325  		ar.logger.Error("postrun failed", "error", err)
   326  	}
   327  
   328  }
   329  
   330  // shouldRun returns true if the alloc is in a state that the alloc runner
   331  // should run it.
   332  func (ar *allocRunner) shouldRun() bool {
   333  	// Do not run allocs that are terminal
   334  	if ar.Alloc().TerminalStatus() {
   335  		ar.logger.Trace("alloc terminal; not running",
   336  			"desired_status", ar.Alloc().DesiredStatus,
   337  			"client_status", ar.Alloc().ClientStatus,
   338  		)
   339  		return false
   340  	}
   341  
   342  	// It's possible that the alloc local state was marked terminal before
   343  	// the server copy of the alloc (checked above) was marked as terminal,
   344  	// so check the local state as well.
   345  	switch clientStatus := ar.AllocState().ClientStatus; clientStatus {
   346  	case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed, structs.AllocClientStatusLost:
   347  		ar.logger.Trace("alloc terminal; updating server and not running", "status", clientStatus)
   348  		return false
   349  	}
   350  
   351  	return true
   352  }
   353  
   354  // runTasks is used to run the task runners and block until they exit.
   355  func (ar *allocRunner) runTasks() {
   356  	// Start all tasks
   357  	for _, task := range ar.tasks {
   358  		go task.Run()
   359  	}
   360  
   361  	// Block on all tasks except poststop tasks
   362  	for _, task := range ar.tasks {
   363  		if !task.IsPoststopTask() {
   364  			<-task.WaitCh()
   365  		}
   366  	}
   367  
   368  	// Signal poststop tasks to proceed to main runtime
   369  	ar.taskHookCoordinator.StartPoststopTasks()
   370  
   371  	// Wait for poststop tasks to finish before proceeding
   372  	for _, task := range ar.tasks {
   373  		if task.IsPoststopTask() {
   374  			<-task.WaitCh()
   375  		}
   376  	}
   377  }
   378  
   379  // Alloc returns the current allocation being run by this runner as sent by the
   380  // server. This view of the allocation does not have updated task states.
   381  func (ar *allocRunner) Alloc() *structs.Allocation {
   382  	ar.allocLock.RLock()
   383  	defer ar.allocLock.RUnlock()
   384  	return ar.alloc
   385  }
   386  
   387  func (ar *allocRunner) setAlloc(updated *structs.Allocation) {
   388  	ar.allocLock.Lock()
   389  	ar.alloc = updated
   390  	ar.allocLock.Unlock()
   391  }
   392  
   393  // GetAllocDir returns the alloc dir which is safe for concurrent use.
   394  func (ar *allocRunner) GetAllocDir() *allocdir.AllocDir {
   395  	return ar.allocDir
   396  }
   397  
   398  // Restore state from database. Must be called after NewAllocRunner but before
   399  // Run.
   400  func (ar *allocRunner) Restore() error {
   401  	// Retrieve deployment status to avoid reseting it across agent
   402  	// restarts. Once a deployment status is set Nomad no longer monitors
   403  	// alloc health, so we must persist deployment state across restarts.
   404  	ds, err := ar.stateDB.GetDeploymentStatus(ar.id)
   405  	if err != nil {
   406  		return err
   407  	}
   408  
   409  	ns, err := ar.stateDB.GetNetworkStatus(ar.id)
   410  	if err != nil {
   411  		return err
   412  	}
   413  
   414  	ar.stateLock.Lock()
   415  	ar.state.DeploymentStatus = ds
   416  	ar.state.NetworkStatus = ns
   417  	ar.stateLock.Unlock()
   418  
   419  	states := make(map[string]*structs.TaskState)
   420  
   421  	// Restore task runners
   422  	for _, tr := range ar.tasks {
   423  		if err := tr.Restore(); err != nil {
   424  			return err
   425  		}
   426  		states[tr.Task().Name] = tr.TaskState()
   427  	}
   428  
   429  	ar.taskHookCoordinator.taskStateUpdated(states)
   430  
   431  	return nil
   432  }
   433  
   434  // persistDeploymentStatus stores AllocDeploymentStatus.
   435  func (ar *allocRunner) persistDeploymentStatus(ds *structs.AllocDeploymentStatus) {
   436  	if err := ar.stateDB.PutDeploymentStatus(ar.id, ds); err != nil {
   437  		// While any persistence errors are very bad, the worst case
   438  		// scenario for failing to persist deployment status is that if
   439  		// the agent is restarted it will monitor the deployment status
   440  		// again. This could cause a deployment's status to change when
   441  		// that shouldn't happen. However, allowing that seems better
   442  		// than failing the entire allocation.
   443  		ar.logger.Error("error storing deployment status", "error", err)
   444  	}
   445  }
   446  
   447  // TaskStateUpdated is called by TaskRunner when a task's state has been
   448  // updated. It does not process the update synchronously but instead notifies a
   449  // goroutine the state has change. Since processing the state change may cause
   450  // the task to be killed (thus change its state again) it cannot be done
   451  // synchronously as it would cause a deadlock due to reentrancy.
   452  //
   453  // The goroutine is used to compute changes to the alloc's ClientStatus and to
   454  // update the server with the new state.
   455  func (ar *allocRunner) TaskStateUpdated() {
   456  	select {
   457  	case ar.taskStateUpdatedCh <- struct{}{}:
   458  	default:
   459  		// already pending updates
   460  	}
   461  }
   462  
   463  // handleTaskStateUpdates must be run in goroutine as it monitors
   464  // taskStateUpdatedCh for task state update notifications and processes task
   465  // states.
   466  //
   467  // Processing task state updates must be done in a goroutine as it may have to
   468  // kill tasks which causes further task state updates.
   469  func (ar *allocRunner) handleTaskStateUpdates() {
   470  	defer close(ar.taskStateUpdateHandlerCh)
   471  
   472  	hasSidecars := hasSidecarTasks(ar.tasks)
   473  
   474  	for done := false; !done; {
   475  		select {
   476  		case <-ar.taskStateUpdatedCh:
   477  		case <-ar.waitCh:
   478  			// Run has exited, sync once more to ensure final
   479  			// states are collected.
   480  			done = true
   481  		}
   482  
   483  		ar.logger.Trace("handling task state update", "done", done)
   484  
   485  		// Set with the appropriate event if task runners should be
   486  		// killed.
   487  		var killEvent *structs.TaskEvent
   488  
   489  		// If task runners should be killed, this is set to the task
   490  		// name whose fault it is.
   491  		killTask := ""
   492  
   493  		// Task state has been updated; gather the state of the other tasks
   494  		trNum := len(ar.tasks)
   495  		liveRunners := make([]*taskrunner.TaskRunner, 0, trNum)
   496  		states := make(map[string]*structs.TaskState, trNum)
   497  
   498  		for name, tr := range ar.tasks {
   499  			state := tr.TaskState()
   500  			states[name] = state
   501  
   502  			if tr.IsPoststopTask() {
   503  				continue
   504  			}
   505  
   506  			// Capture live task runners in case we need to kill them
   507  			if state.State != structs.TaskStateDead {
   508  				liveRunners = append(liveRunners, tr)
   509  				continue
   510  			}
   511  
   512  			// Task is dead, determine if other tasks should be killed
   513  			if state.Failed {
   514  				// Only set failed event if no event has been
   515  				// set yet to give dead leaders priority.
   516  				if killEvent == nil {
   517  					killTask = name
   518  					killEvent = structs.NewTaskEvent(structs.TaskSiblingFailed).
   519  						SetFailedSibling(name)
   520  				}
   521  			} else if tr.IsLeader() {
   522  				killEvent = structs.NewTaskEvent(structs.TaskLeaderDead)
   523  			}
   524  		}
   525  
   526  		// if all live runners are sidecars - kill alloc
   527  		if killEvent == nil && hasSidecars && !hasNonSidecarTasks(liveRunners) {
   528  			killEvent = structs.NewTaskEvent(structs.TaskMainDead)
   529  		}
   530  
   531  		// If there's a kill event set and live runners, kill them
   532  		if killEvent != nil && len(liveRunners) > 0 {
   533  
   534  			// Log kill reason
   535  			switch killEvent.Type {
   536  			case structs.TaskLeaderDead:
   537  				ar.logger.Debug("leader task dead, destroying all tasks", "leader_task", killTask)
   538  			case structs.TaskMainDead:
   539  				ar.logger.Debug("main tasks dead, destroying all sidecar tasks")
   540  			default:
   541  				ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask)
   542  			}
   543  
   544  			// Emit kill event for live runners
   545  			for _, tr := range liveRunners {
   546  				tr.EmitEvent(killEvent)
   547  			}
   548  
   549  			// Kill 'em all
   550  			states = ar.killTasks()
   551  
   552  			// Wait for TaskRunners to exit before continuing to
   553  			// prevent looping before TaskRunners have transitioned
   554  			// to Dead.
   555  			for _, tr := range liveRunners {
   556  				ar.logger.Info("killing task: ", tr.Task().Name)
   557  				select {
   558  				case <-tr.WaitCh():
   559  				case <-ar.waitCh:
   560  				}
   561  			}
   562  		}
   563  
   564  		ar.taskHookCoordinator.taskStateUpdated(states)
   565  
   566  		// Get the client allocation
   567  		calloc := ar.clientAlloc(states)
   568  
   569  		// Update the server
   570  		ar.stateUpdater.AllocStateUpdated(calloc)
   571  
   572  		// Broadcast client alloc to listeners
   573  		ar.allocBroadcaster.Send(calloc)
   574  	}
   575  }
   576  
   577  // killTasks kills all task runners, leader (if there is one) first. Errors are
   578  // logged except taskrunner.ErrTaskNotRunning which is ignored. Task states
   579  // after Kill has been called are returned.
   580  func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
   581  	var mu sync.Mutex
   582  	states := make(map[string]*structs.TaskState, len(ar.tasks))
   583  
   584  	// run alloc prekill hooks
   585  	ar.preKillHooks()
   586  
   587  	// Kill leader first, synchronously
   588  	for name, tr := range ar.tasks {
   589  		if !tr.IsLeader() {
   590  			continue
   591  		}
   592  
   593  		taskEvent := structs.NewTaskEvent(structs.TaskKilling)
   594  		taskEvent.SetKillTimeout(tr.Task().KillTimeout)
   595  		err := tr.Kill(context.TODO(), taskEvent)
   596  		if err != nil && err != taskrunner.ErrTaskNotRunning {
   597  			ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
   598  		}
   599  
   600  		state := tr.TaskState()
   601  		states[name] = state
   602  		break
   603  	}
   604  
   605  	// Kill the rest concurrently
   606  	wg := sync.WaitGroup{}
   607  	for name, tr := range ar.tasks {
   608  		// Filter out poststop tasks so they run after all the other tasks are killed
   609  		if tr.IsLeader() || tr.IsPoststopTask() {
   610  			continue
   611  		}
   612  
   613  		wg.Add(1)
   614  		go func(name string, tr *taskrunner.TaskRunner) {
   615  			defer wg.Done()
   616  			taskEvent := structs.NewTaskEvent(structs.TaskKilling)
   617  			taskEvent.SetKillTimeout(tr.Task().KillTimeout)
   618  			err := tr.Kill(context.TODO(), taskEvent)
   619  			if err != nil && err != taskrunner.ErrTaskNotRunning {
   620  				ar.logger.Warn("error stopping task", "error", err, "task_name", name)
   621  			}
   622  
   623  			state := tr.TaskState()
   624  			mu.Lock()
   625  			states[name] = state
   626  			mu.Unlock()
   627  		}(name, tr)
   628  	}
   629  	wg.Wait()
   630  
   631  	return states
   632  }
   633  
   634  // clientAlloc takes in the task states and returns an Allocation populated
   635  // with Client specific fields
   636  func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *structs.Allocation {
   637  	ar.stateLock.Lock()
   638  	defer ar.stateLock.Unlock()
   639  
   640  	// store task states for AllocState to expose
   641  	ar.state.TaskStates = taskStates
   642  
   643  	a := &structs.Allocation{
   644  		ID:         ar.id,
   645  		TaskStates: taskStates,
   646  	}
   647  
   648  	if d := ar.state.DeploymentStatus; d != nil {
   649  		a.DeploymentStatus = d.Copy()
   650  	}
   651  
   652  	// Compute the ClientStatus
   653  	if ar.state.ClientStatus != "" {
   654  		// The client status is being forced
   655  		a.ClientStatus, a.ClientDescription = ar.state.ClientStatus, ar.state.ClientDescription
   656  	} else {
   657  		a.ClientStatus, a.ClientDescription = getClientStatus(taskStates)
   658  	}
   659  
   660  	// If the allocation is terminal, make sure all required fields are properly
   661  	// set.
   662  	if a.ClientTerminalStatus() {
   663  		alloc := ar.Alloc()
   664  
   665  		// If we are part of a deployment and the alloc has failed, mark the
   666  		// alloc as unhealthy. This guards against the watcher not be started.
   667  		// If the health status is already set then terminal allocations should not
   668  		if a.ClientStatus == structs.AllocClientStatusFailed &&
   669  			alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() {
   670  			a.DeploymentStatus = &structs.AllocDeploymentStatus{
   671  				Healthy: helper.BoolToPtr(false),
   672  			}
   673  		}
   674  
   675  		// Make sure we have marked the finished at for every task. This is used
   676  		// to calculate the reschedule time for failed allocations.
   677  		now := time.Now()
   678  		for taskName := range ar.tasks {
   679  			ts, ok := a.TaskStates[taskName]
   680  			if !ok {
   681  				ts = &structs.TaskState{}
   682  				a.TaskStates[taskName] = ts
   683  			}
   684  			if ts.FinishedAt.IsZero() {
   685  				ts.FinishedAt = now
   686  			}
   687  		}
   688  	}
   689  
   690  	// Set the NetworkStatus and default DNSConfig if one is not returned from the client
   691  	netStatus := ar.state.NetworkStatus
   692  	if netStatus != nil {
   693  		a.NetworkStatus = netStatus
   694  	} else {
   695  		a.NetworkStatus = new(structs.AllocNetworkStatus)
   696  	}
   697  
   698  	if a.NetworkStatus.DNS == nil {
   699  		alloc := ar.Alloc()
   700  		nws := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Networks
   701  		if len(nws) > 0 {
   702  			a.NetworkStatus.DNS = nws[0].DNS.Copy()
   703  		}
   704  	}
   705  
   706  	return a
   707  }
   708  
   709  // getClientStatus takes in the task states for a given allocation and computes
   710  // the client status and description
   711  func getClientStatus(taskStates map[string]*structs.TaskState) (status, description string) {
   712  	var pending, running, dead, failed bool
   713  	for _, state := range taskStates {
   714  		switch state.State {
   715  		case structs.TaskStateRunning:
   716  			running = true
   717  		case structs.TaskStatePending:
   718  			pending = true
   719  		case structs.TaskStateDead:
   720  			if state.Failed {
   721  				failed = true
   722  			} else {
   723  				dead = true
   724  			}
   725  		}
   726  	}
   727  
   728  	// Determine the alloc status
   729  	if failed {
   730  		return structs.AllocClientStatusFailed, "Failed tasks"
   731  	} else if running {
   732  		return structs.AllocClientStatusRunning, "Tasks are running"
   733  	} else if pending {
   734  		return structs.AllocClientStatusPending, "No tasks have started"
   735  	} else if dead {
   736  		return structs.AllocClientStatusComplete, "All tasks have completed"
   737  	}
   738  
   739  	return "", ""
   740  }
   741  
   742  // SetClientStatus is a helper for forcing a specific client
   743  // status on the alloc runner. This is used during restore errors
   744  // when the task state can't be restored.
   745  func (ar *allocRunner) SetClientStatus(clientStatus string) {
   746  	ar.stateLock.Lock()
   747  	defer ar.stateLock.Unlock()
   748  	ar.state.ClientStatus = clientStatus
   749  }
   750  
   751  func (ar *allocRunner) SetNetworkStatus(s *structs.AllocNetworkStatus) {
   752  	ar.stateLock.Lock()
   753  	defer ar.stateLock.Unlock()
   754  	ar.state.NetworkStatus = s.Copy()
   755  }
   756  
   757  func (ar *allocRunner) NetworkStatus() *structs.AllocNetworkStatus {
   758  	ar.stateLock.Lock()
   759  	defer ar.stateLock.Unlock()
   760  	return ar.state.NetworkStatus.Copy()
   761  }
   762  
   763  // AllocState returns a copy of allocation state including a snapshot of task
   764  // states.
   765  func (ar *allocRunner) AllocState() *state.State {
   766  	ar.stateLock.RLock()
   767  	state := ar.state.Copy()
   768  	ar.stateLock.RUnlock()
   769  
   770  	// If TaskStateUpdated has not been called yet, ar.state.TaskStates
   771  	// won't be set as it is not the canonical source of TaskStates.
   772  	if len(state.TaskStates) == 0 {
   773  		ar.state.TaskStates = make(map[string]*structs.TaskState, len(ar.tasks))
   774  		for k, tr := range ar.tasks {
   775  			state.TaskStates[k] = tr.TaskState()
   776  		}
   777  	}
   778  
   779  	// Generate alloc to get other state fields
   780  	alloc := ar.clientAlloc(state.TaskStates)
   781  	state.ClientStatus = alloc.ClientStatus
   782  	state.ClientDescription = alloc.ClientDescription
   783  	state.DeploymentStatus = alloc.DeploymentStatus
   784  
   785  	return state
   786  }
   787  
   788  // Update asyncronously updates the running allocation with a new version
   789  // received from the server.
   790  // When processing a new update, we will first attempt to drain stale updates
   791  // from the queue, before appending the new one.
   792  func (ar *allocRunner) Update(update *structs.Allocation) {
   793  	select {
   794  	// Drain queued update from the channel if possible, and check the modify
   795  	// index
   796  	case oldUpdate := <-ar.allocUpdatedCh:
   797  		// If the old update is newer than the replacement, then skip the new one
   798  		// and return. This case shouldn't happen, but may in the case of a bug
   799  		// elsewhere inside the system.
   800  		if oldUpdate.AllocModifyIndex > update.AllocModifyIndex {
   801  			ar.logger.Debug("Discarding allocation update due to newer alloc revision in queue",
   802  				"old_modify_index", oldUpdate.AllocModifyIndex,
   803  				"new_modify_index", update.AllocModifyIndex)
   804  			ar.allocUpdatedCh <- oldUpdate
   805  			return
   806  		} else {
   807  			ar.logger.Debug("Discarding allocation update",
   808  				"skipped_modify_index", oldUpdate.AllocModifyIndex,
   809  				"new_modify_index", update.AllocModifyIndex)
   810  		}
   811  	case <-ar.waitCh:
   812  		ar.logger.Trace("AllocRunner has terminated, skipping alloc update",
   813  			"modify_index", update.AllocModifyIndex)
   814  		return
   815  	default:
   816  	}
   817  
   818  	// Queue the new update
   819  	ar.allocUpdatedCh <- update
   820  }
   821  
   822  func (ar *allocRunner) handleAllocUpdates() {
   823  	for {
   824  		select {
   825  		case update := <-ar.allocUpdatedCh:
   826  			ar.handleAllocUpdate(update)
   827  		case <-ar.waitCh:
   828  			return
   829  		}
   830  	}
   831  }
   832  
   833  // This method sends the updated alloc to Run for serially processing updates.
   834  // If there is already a pending update it will be discarded and replaced by
   835  // the latest update.
   836  func (ar *allocRunner) handleAllocUpdate(update *structs.Allocation) {
   837  	// Detect Stop updates
   838  	stopping := !ar.Alloc().TerminalStatus() && update.TerminalStatus()
   839  
   840  	// Update ar.alloc
   841  	ar.setAlloc(update)
   842  
   843  	// Run update hooks if not stopping or dead
   844  	if !update.TerminalStatus() {
   845  		if err := ar.update(update); err != nil {
   846  			ar.logger.Error("error running update hooks", "error", err)
   847  		}
   848  
   849  	}
   850  
   851  	// Update task runners
   852  	for _, tr := range ar.tasks {
   853  		tr.Update(update)
   854  	}
   855  
   856  	// If alloc is being terminated, kill all tasks, leader first
   857  	if stopping {
   858  		ar.killTasks()
   859  	}
   860  
   861  }
   862  
   863  func (ar *allocRunner) Listener() *cstructs.AllocListener {
   864  	return ar.allocBroadcaster.Listen()
   865  }
   866  
   867  func (ar *allocRunner) destroyImpl() {
   868  	// Stop any running tasks and persist states in case the client is
   869  	// shutdown before Destroy finishes.
   870  	states := ar.killTasks()
   871  	calloc := ar.clientAlloc(states)
   872  	ar.stateUpdater.AllocStateUpdated(calloc)
   873  
   874  	// Wait for tasks to exit and postrun hooks to finish
   875  	<-ar.waitCh
   876  
   877  	// Run destroy hooks
   878  	if err := ar.destroy(); err != nil {
   879  		ar.logger.Warn("error running destroy hooks", "error", err)
   880  	}
   881  
   882  	// Wait for task state update handler to exit before removing local
   883  	// state if Run() ran at all.
   884  	<-ar.taskStateUpdateHandlerCh
   885  
   886  	// Mark alloc as destroyed
   887  	ar.destroyedLock.Lock()
   888  
   889  	// Cleanup state db; while holding the lock to avoid
   890  	// a race periodic PersistState that may resurrect the alloc
   891  	if err := ar.stateDB.DeleteAllocationBucket(ar.id); err != nil {
   892  		ar.logger.Warn("failed to delete allocation state", "error", err)
   893  	}
   894  
   895  	if !ar.shutdown {
   896  		ar.shutdown = true
   897  		close(ar.shutdownCh)
   898  	}
   899  
   900  	ar.destroyed = true
   901  	close(ar.destroyCh)
   902  
   903  	ar.destroyedLock.Unlock()
   904  }
   905  
   906  func (ar *allocRunner) PersistState() error {
   907  	ar.destroyedLock.Lock()
   908  	defer ar.destroyedLock.Unlock()
   909  
   910  	if ar.destroyed {
   911  		err := ar.stateDB.DeleteAllocationBucket(ar.id, cstate.WithBatchMode())
   912  		if err != nil {
   913  			ar.logger.Warn("failed to delete allocation bucket", "error", err)
   914  		}
   915  		return nil
   916  	}
   917  
   918  	// persist network status, wrapping in a func to release state lock as early as possible
   919  	err := func() error {
   920  		ar.stateLock.Lock()
   921  		defer ar.stateLock.Unlock()
   922  		if ar.state.NetworkStatus != nil {
   923  			err := ar.stateDB.PutNetworkStatus(ar.id, ar.state.NetworkStatus, cstate.WithBatchMode())
   924  			if err != nil {
   925  				return err
   926  			}
   927  		}
   928  		return nil
   929  	}()
   930  	if err != nil {
   931  		return err
   932  	}
   933  
   934  	// TODO: consider persisting deployment state along with task status.
   935  	// While we study why only the alloc is persisted, I opted to maintain current
   936  	// behavior and not risk adding yet more IO calls unnecessarily.
   937  	return ar.stateDB.PutAllocation(ar.Alloc(), cstate.WithBatchMode())
   938  }
   939  
   940  // Destroy the alloc runner by stopping it if it is still running and cleaning
   941  // up all of its resources.
   942  //
   943  // This method is safe for calling concurrently with Run() and will cause it to
   944  // exit (thus closing WaitCh).
   945  // When the destroy action is completed, it will close DestroyCh().
   946  func (ar *allocRunner) Destroy() {
   947  	ar.destroyedLock.Lock()
   948  	defer ar.destroyedLock.Unlock()
   949  
   950  	if ar.destroyed {
   951  		// Only destroy once
   952  		return
   953  	}
   954  
   955  	if ar.destroyLaunched {
   956  		// Only dispatch a destroy once
   957  		return
   958  	}
   959  
   960  	ar.destroyLaunched = true
   961  
   962  	// Synchronize calls to shutdown/destroy
   963  	if ar.shutdownLaunched {
   964  		go func() {
   965  			ar.logger.Debug("Waiting for shutdown before destroying runner")
   966  			<-ar.shutdownCh
   967  			ar.destroyImpl()
   968  		}()
   969  
   970  		return
   971  	}
   972  
   973  	go ar.destroyImpl()
   974  }
   975  
   976  // IsDestroyed returns true if the alloc runner has been destroyed (stopped and
   977  // garbage collected).
   978  //
   979  // This method is safe for calling concurrently with Run(). Callers must
   980  // receive on WaitCh() to block until alloc runner has stopped and been
   981  // destroyed.
   982  func (ar *allocRunner) IsDestroyed() bool {
   983  	ar.destroyedLock.Lock()
   984  	defer ar.destroyedLock.Unlock()
   985  	return ar.destroyed
   986  }
   987  
   988  // IsWaiting returns true if the alloc runner is waiting for its previous
   989  // allocation to terminate.
   990  //
   991  // This method is safe for calling concurrently with Run().
   992  func (ar *allocRunner) IsWaiting() bool {
   993  	return ar.prevAllocWatcher.IsWaiting()
   994  }
   995  
   996  // isShuttingDown returns true if the alloc runner is in a shutdown state
   997  // due to a call to Shutdown() or Destroy()
   998  func (ar *allocRunner) isShuttingDown() bool {
   999  	ar.destroyedLock.Lock()
  1000  	defer ar.destroyedLock.Unlock()
  1001  	return ar.shutdownLaunched
  1002  }
  1003  
  1004  // DestroyCh is a channel that is closed when an allocrunner is closed due to
  1005  // an explicit call to Destroy().
  1006  func (ar *allocRunner) DestroyCh() <-chan struct{} {
  1007  	return ar.destroyCh
  1008  }
  1009  
  1010  // ShutdownCh is a channel that is closed when an allocrunner is closed due to
  1011  // either an explicit call to Shutdown(), or Destroy().
  1012  func (ar *allocRunner) ShutdownCh() <-chan struct{} {
  1013  	return ar.shutdownCh
  1014  }
  1015  
  1016  // Shutdown AllocRunner gracefully. Asynchronously shuts down all TaskRunners.
  1017  // Tasks are unaffected and may be restored.
  1018  // When the destroy action is completed, it will close ShutdownCh().
  1019  func (ar *allocRunner) Shutdown() {
  1020  	ar.destroyedLock.Lock()
  1021  	defer ar.destroyedLock.Unlock()
  1022  
  1023  	// Destroy is a superset of Shutdown so there's nothing to do if this
  1024  	// has already been destroyed.
  1025  	if ar.destroyed {
  1026  		return
  1027  	}
  1028  
  1029  	// Destroy is a superset of Shutdown so if it's been marked for destruction,
  1030  	// don't try and shutdown in parallel. If shutdown has been launched, don't
  1031  	// try again.
  1032  	if ar.destroyLaunched || ar.shutdownLaunched {
  1033  		return
  1034  	}
  1035  
  1036  	ar.shutdownLaunched = true
  1037  
  1038  	go func() {
  1039  		ar.logger.Trace("shutting down")
  1040  
  1041  		// Shutdown tasks gracefully if they were run
  1042  		wg := sync.WaitGroup{}
  1043  		for _, tr := range ar.tasks {
  1044  			wg.Add(1)
  1045  			go func(tr *taskrunner.TaskRunner) {
  1046  				tr.Shutdown()
  1047  				wg.Done()
  1048  			}(tr)
  1049  		}
  1050  		wg.Wait()
  1051  
  1052  		// Wait for Run to exit
  1053  		<-ar.waitCh
  1054  
  1055  		// Run shutdown hooks
  1056  		ar.shutdownHooks()
  1057  
  1058  		// Wait for updater to finish its final run
  1059  		<-ar.taskStateUpdateHandlerCh
  1060  
  1061  		ar.destroyedLock.Lock()
  1062  		ar.shutdown = true
  1063  		close(ar.shutdownCh)
  1064  		ar.destroyedLock.Unlock()
  1065  	}()
  1066  }
  1067  
  1068  // IsMigrating returns true if the alloc runner is migrating data from its
  1069  // previous allocation.
  1070  //
  1071  // This method is safe for calling concurrently with Run().
  1072  func (ar *allocRunner) IsMigrating() bool {
  1073  	return ar.prevAllocMigrator.IsMigrating()
  1074  }
  1075  
  1076  func (ar *allocRunner) StatsReporter() interfaces.AllocStatsReporter {
  1077  	return ar
  1078  }
  1079  
  1080  // LatestAllocStats returns the latest stats for an allocation. If taskFilter
  1081  // is set, only stats for that task -- if it exists -- are returned.
  1082  func (ar *allocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
  1083  	astat := &cstructs.AllocResourceUsage{
  1084  		Tasks: make(map[string]*cstructs.TaskResourceUsage, len(ar.tasks)),
  1085  		ResourceUsage: &cstructs.ResourceUsage{
  1086  			MemoryStats: &cstructs.MemoryStats{},
  1087  			CpuStats:    &cstructs.CpuStats{},
  1088  			DeviceStats: []*device.DeviceGroupStats{},
  1089  		},
  1090  	}
  1091  
  1092  	for name, tr := range ar.tasks {
  1093  		if taskFilter != "" && taskFilter != name {
  1094  			// Getting stats for a particular task and its not this one!
  1095  			continue
  1096  		}
  1097  
  1098  		if usage := tr.LatestResourceUsage(); usage != nil {
  1099  			astat.Tasks[name] = usage
  1100  			astat.ResourceUsage.Add(usage.ResourceUsage)
  1101  			if usage.Timestamp > astat.Timestamp {
  1102  				astat.Timestamp = usage.Timestamp
  1103  			}
  1104  		}
  1105  	}
  1106  
  1107  	return astat, nil
  1108  }
  1109  
  1110  func (ar *allocRunner) GetTaskEventHandler(taskName string) drivermanager.EventHandler {
  1111  	if tr, ok := ar.tasks[taskName]; ok {
  1112  		return func(ev *drivers.TaskEvent) {
  1113  			tr.EmitEvent(&structs.TaskEvent{
  1114  				Type:          structs.TaskDriverMessage,
  1115  				Time:          ev.Timestamp.UnixNano(),
  1116  				Details:       ev.Annotations,
  1117  				DriverMessage: ev.Message,
  1118  			})
  1119  		}
  1120  	}
  1121  	return nil
  1122  }
  1123  
  1124  // RestartTask signalls the task runner for the  provided task to restart.
  1125  func (ar *allocRunner) RestartTask(taskName string, taskEvent *structs.TaskEvent) error {
  1126  	tr, ok := ar.tasks[taskName]
  1127  	if !ok {
  1128  		return fmt.Errorf("Could not find task runner for task: %s", taskName)
  1129  	}
  1130  
  1131  	return tr.Restart(context.TODO(), taskEvent, false)
  1132  }
  1133  
  1134  // Restart satisfies the WorkloadRestarter interface restarts all task runners
  1135  // concurrently
  1136  func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error {
  1137  	waitCh := make(chan struct{})
  1138  	var err *multierror.Error
  1139  	var errMutex sync.Mutex
  1140  
  1141  	// run alloc task restart hooks
  1142  	ar.taskRestartHooks()
  1143  
  1144  	go func() {
  1145  		var wg sync.WaitGroup
  1146  		defer close(waitCh)
  1147  		for tn, tr := range ar.tasks {
  1148  			wg.Add(1)
  1149  			go func(taskName string, r agentconsul.WorkloadRestarter) {
  1150  				defer wg.Done()
  1151  				e := r.Restart(ctx, event, failure)
  1152  				if e != nil {
  1153  					errMutex.Lock()
  1154  					defer errMutex.Unlock()
  1155  					err = multierror.Append(err, fmt.Errorf("failed to restart task %s: %v", taskName, e))
  1156  				}
  1157  			}(tn, tr)
  1158  		}
  1159  		wg.Wait()
  1160  	}()
  1161  
  1162  	select {
  1163  	case <-waitCh:
  1164  	case <-ctx.Done():
  1165  	}
  1166  
  1167  	return err.ErrorOrNil()
  1168  }
  1169  
  1170  // RestartAll signalls all task runners in the allocation to restart and passes
  1171  // a copy of the task event to each restart event.
  1172  // Returns any errors in a concatenated form.
  1173  func (ar *allocRunner) RestartAll(taskEvent *structs.TaskEvent) error {
  1174  	var err *multierror.Error
  1175  
  1176  	// run alloc task restart hooks
  1177  	ar.taskRestartHooks()
  1178  
  1179  	for tn := range ar.tasks {
  1180  		rerr := ar.RestartTask(tn, taskEvent.Copy())
  1181  		if rerr != nil {
  1182  			err = multierror.Append(err, rerr)
  1183  		}
  1184  	}
  1185  
  1186  	return err.ErrorOrNil()
  1187  }
  1188  
  1189  // Signal sends a signal request to task runners inside an allocation. If the
  1190  // taskName is empty, then it is sent to all tasks.
  1191  func (ar *allocRunner) Signal(taskName, signal string) error {
  1192  	event := structs.NewTaskEvent(structs.TaskSignaling).SetSignalText(signal)
  1193  
  1194  	if taskName != "" {
  1195  		tr, ok := ar.tasks[taskName]
  1196  		if !ok {
  1197  			return fmt.Errorf("Task not found")
  1198  		}
  1199  
  1200  		return tr.Signal(event, signal)
  1201  	}
  1202  
  1203  	var err *multierror.Error
  1204  
  1205  	for tn, tr := range ar.tasks {
  1206  		rerr := tr.Signal(event.Copy(), signal)
  1207  		if rerr != nil {
  1208  			err = multierror.Append(err, fmt.Errorf("Failed to signal task: %s, err: %v", tn, rerr))
  1209  		}
  1210  	}
  1211  
  1212  	return err.ErrorOrNil()
  1213  }
  1214  
  1215  func (ar *allocRunner) GetTaskExecHandler(taskName string) drivermanager.TaskExecHandler {
  1216  	tr, ok := ar.tasks[taskName]
  1217  	if !ok {
  1218  		return nil
  1219  	}
  1220  
  1221  	return tr.TaskExecHandler()
  1222  }
  1223  
  1224  func (ar *allocRunner) GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) {
  1225  	tr, ok := ar.tasks[taskName]
  1226  	if !ok {
  1227  		return nil, fmt.Errorf("task not found")
  1228  	}
  1229  
  1230  	return tr.DriverCapabilities()
  1231  }