github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/client/alloc_runner.go (about)

     1  package client
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/hashicorp/go-multierror"
    12  	"github.com/ncodes/nomad/client/allocdir"
    13  	"github.com/ncodes/nomad/client/config"
    14  	"github.com/ncodes/nomad/client/vaultclient"
    15  	"github.com/ncodes/nomad/nomad/structs"
    16  
    17  	cstructs "github.com/ncodes/nomad/client/structs"
    18  )
    19  
    20  const (
    21  	// taskReceivedSyncLimit is how long the client will wait before sending
    22  	// that a task was received to the server. The client does not immediately
    23  	// send that the task was received to the server because another transition
    24  	// to running or failed is likely to occur immediately after and a single
    25  	// update will transfer all past state information. If not other transition
    26  	// has occurred up to this limit, we will send to the server.
    27  	taskReceivedSyncLimit = 30 * time.Second
    28  )
    29  
    30  // AllocStateUpdater is used to update the status of an allocation
    31  type AllocStateUpdater func(alloc *structs.Allocation)
    32  
    33  type AllocStatsReporter interface {
    34  	LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
    35  }
    36  
    37  // AllocRunner is used to wrap an allocation and provide the execution context.
    38  type AllocRunner struct {
    39  	config  *config.Config
    40  	updater AllocStateUpdater
    41  	logger  *log.Logger
    42  
    43  	alloc                  *structs.Allocation
    44  	allocClientStatus      string // Explicit status of allocation. Set when there are failures
    45  	allocClientDescription string
    46  	allocLock              sync.Mutex
    47  
    48  	dirtyCh chan struct{}
    49  
    50  	allocDir     *allocdir.AllocDir
    51  	allocDirLock sync.Mutex
    52  
    53  	tasks      map[string]*TaskRunner
    54  	taskStates map[string]*structs.TaskState
    55  	restored   map[string]struct{}
    56  	taskLock   sync.RWMutex
    57  
    58  	taskStatusLock sync.RWMutex
    59  
    60  	updateCh chan *structs.Allocation
    61  
    62  	vaultClient vaultclient.VaultClient
    63  
    64  	otherAllocDir *allocdir.AllocDir
    65  
    66  	destroy     bool
    67  	destroyCh   chan struct{}
    68  	destroyLock sync.Mutex
    69  	waitCh      chan struct{}
    70  
    71  	// serialize saveAllocRunnerState calls
    72  	persistLock sync.Mutex
    73  }
    74  
    75  // allocRunnerState is used to snapshot the state of the alloc runner
    76  type allocRunnerState struct {
    77  	Version                string
    78  	Alloc                  *structs.Allocation
    79  	AllocDir               *allocdir.AllocDir
    80  	AllocClientStatus      string
    81  	AllocClientDescription string
    82  
    83  	// COMPAT: Remove in 0.7.0: removing will break upgrading directly from
    84  	//         0.5.2, so don't remove in the 0.6 series.
    85  	// Context is deprecated and only used to migrate from older releases.
    86  	// It will be removed in the future.
    87  	Context *struct {
    88  		AllocID  string // unused; included for completeness
    89  		AllocDir struct {
    90  			AllocDir  string
    91  			SharedDir string // unused; included for completeness
    92  			TaskDirs  map[string]string
    93  		}
    94  	} `json:"Context,omitempty"`
    95  }
    96  
    97  // NewAllocRunner is used to create a new allocation context
    98  func NewAllocRunner(logger *log.Logger, config *config.Config, updater AllocStateUpdater,
    99  	alloc *structs.Allocation, vaultClient vaultclient.VaultClient) *AllocRunner {
   100  	ar := &AllocRunner{
   101  		config:      config,
   102  		updater:     updater,
   103  		logger:      logger,
   104  		alloc:       alloc,
   105  		dirtyCh:     make(chan struct{}, 1),
   106  		tasks:       make(map[string]*TaskRunner),
   107  		taskStates:  copyTaskStates(alloc.TaskStates),
   108  		restored:    make(map[string]struct{}),
   109  		updateCh:    make(chan *structs.Allocation, 64),
   110  		destroyCh:   make(chan struct{}),
   111  		waitCh:      make(chan struct{}),
   112  		vaultClient: vaultClient,
   113  	}
   114  	return ar
   115  }
   116  
   117  // stateFilePath returns the path to our state file
   118  func (r *AllocRunner) stateFilePath() string {
   119  	r.allocLock.Lock()
   120  	defer r.allocLock.Unlock()
   121  	path := filepath.Join(r.config.StateDir, "alloc", r.alloc.ID, "state.json")
   122  	return path
   123  }
   124  
   125  // RestoreState is used to restore the state of the alloc runner
   126  func (r *AllocRunner) RestoreState() error {
   127  	// Load the snapshot
   128  	var snap allocRunnerState
   129  	if err := restoreState(r.stateFilePath(), &snap); err != nil {
   130  		return err
   131  	}
   132  
   133  	// #2132 Upgrade path: if snap.AllocDir is nil, try to convert old
   134  	// Context struct to new AllocDir struct
   135  	if snap.AllocDir == nil && snap.Context != nil {
   136  		r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.alloc.ID)
   137  		snap.AllocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir)
   138  		for taskName := range snap.Context.AllocDir.TaskDirs {
   139  			snap.AllocDir.NewTaskDir(taskName)
   140  		}
   141  	}
   142  
   143  	// Restore fields
   144  	r.alloc = snap.Alloc
   145  	r.allocDir = snap.AllocDir
   146  	r.allocClientStatus = snap.AllocClientStatus
   147  	r.allocClientDescription = snap.AllocClientDescription
   148  
   149  	var snapshotErrors multierror.Error
   150  	if r.alloc == nil {
   151  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
   152  	}
   153  	if r.allocDir == nil {
   154  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir"))
   155  	}
   156  	if e := snapshotErrors.ErrorOrNil(); e != nil {
   157  		return e
   158  	}
   159  
   160  	r.taskStates = snap.Alloc.TaskStates
   161  
   162  	// Restore the task runners
   163  	var mErr multierror.Error
   164  	for name, state := range r.taskStates {
   165  		// Mark the task as restored.
   166  		r.restored[name] = struct{}{}
   167  
   168  		td, ok := r.allocDir.TaskDirs[name]
   169  		if !ok {
   170  			err := fmt.Errorf("failed to find task dir metadata for alloc %q task %q",
   171  				r.alloc.ID, name)
   172  			r.logger.Printf("[ERR] client: %v", err)
   173  			return err
   174  		}
   175  
   176  		task := &structs.Task{Name: name}
   177  		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, td, r.Alloc(), task, r.vaultClient)
   178  		r.tasks[name] = tr
   179  
   180  		// Skip tasks in terminal states.
   181  		if state.State == structs.TaskStateDead {
   182  			continue
   183  		}
   184  
   185  		if err := tr.RestoreState(); err != nil {
   186  			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task '%s': %v", r.alloc.ID, name, err)
   187  			mErr.Errors = append(mErr.Errors, err)
   188  		} else if !r.alloc.TerminalStatus() {
   189  			// Only start if the alloc isn't in a terminal status.
   190  			go tr.Run()
   191  		}
   192  	}
   193  
   194  	return mErr.ErrorOrNil()
   195  }
   196  
   197  // GetAllocDir returns the alloc dir for the alloc runner
   198  func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir {
   199  	return r.allocDir
   200  }
   201  
   202  // SaveState is used to snapshot the state of the alloc runner
   203  // if the fullSync is marked as false only the state of the Alloc Runner
   204  // is snapshotted. If fullSync is marked as true, we snapshot
   205  // all the Task Runners associated with the Alloc
   206  func (r *AllocRunner) SaveState() error {
   207  	if err := r.saveAllocRunnerState(); err != nil {
   208  		return err
   209  	}
   210  
   211  	// Save state for each task
   212  	runners := r.getTaskRunners()
   213  	var mErr multierror.Error
   214  	for _, tr := range runners {
   215  		if err := r.saveTaskRunnerState(tr); err != nil {
   216  			mErr.Errors = append(mErr.Errors, err)
   217  		}
   218  	}
   219  	return mErr.ErrorOrNil()
   220  }
   221  
   222  func (r *AllocRunner) saveAllocRunnerState() error {
   223  	r.persistLock.Lock()
   224  	defer r.persistLock.Unlock()
   225  
   226  	// Create the snapshot.
   227  	alloc := r.Alloc()
   228  
   229  	r.allocLock.Lock()
   230  	allocClientStatus := r.allocClientStatus
   231  	allocClientDescription := r.allocClientDescription
   232  	r.allocLock.Unlock()
   233  
   234  	r.allocDirLock.Lock()
   235  	allocDir := r.allocDir
   236  	r.allocDirLock.Unlock()
   237  
   238  	snap := allocRunnerState{
   239  		Version:                r.config.Version,
   240  		Alloc:                  alloc,
   241  		AllocDir:               allocDir,
   242  		AllocClientStatus:      allocClientStatus,
   243  		AllocClientDescription: allocClientDescription,
   244  	}
   245  	return persistState(r.stateFilePath(), &snap)
   246  }
   247  
   248  func (r *AllocRunner) saveTaskRunnerState(tr *TaskRunner) error {
   249  	if err := tr.SaveState(); err != nil {
   250  		return fmt.Errorf("failed to save state for alloc %s task '%s': %v",
   251  			r.alloc.ID, tr.task.Name, err)
   252  	}
   253  	return nil
   254  }
   255  
   256  // DestroyState is used to cleanup after ourselves
   257  func (r *AllocRunner) DestroyState() error {
   258  	return os.RemoveAll(filepath.Dir(r.stateFilePath()))
   259  }
   260  
   261  // DestroyContext is used to destroy the context
   262  func (r *AllocRunner) DestroyContext() error {
   263  	return r.allocDir.Destroy()
   264  }
   265  
   266  // copyTaskStates returns a copy of the passed task states.
   267  func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
   268  	copy := make(map[string]*structs.TaskState, len(states))
   269  	for task, state := range states {
   270  		copy[task] = state.Copy()
   271  	}
   272  	return copy
   273  }
   274  
   275  // Alloc returns the associated allocation
   276  func (r *AllocRunner) Alloc() *structs.Allocation {
   277  	r.allocLock.Lock()
   278  	alloc := r.alloc.Copy()
   279  
   280  	// The status has explicitly been set.
   281  	if r.allocClientStatus != "" || r.allocClientDescription != "" {
   282  		alloc.ClientStatus = r.allocClientStatus
   283  		alloc.ClientDescription = r.allocClientDescription
   284  
   285  		// Copy over the task states so we don't lose them
   286  		r.taskStatusLock.RLock()
   287  		alloc.TaskStates = copyTaskStates(r.taskStates)
   288  		r.taskStatusLock.RUnlock()
   289  
   290  		r.allocLock.Unlock()
   291  		return alloc
   292  	}
   293  	r.allocLock.Unlock()
   294  
   295  	// Scan the task states to determine the status of the alloc
   296  	var pending, running, dead, failed bool
   297  	r.taskStatusLock.RLock()
   298  	alloc.TaskStates = copyTaskStates(r.taskStates)
   299  	for _, state := range r.taskStates {
   300  		switch state.State {
   301  		case structs.TaskStateRunning:
   302  			running = true
   303  		case structs.TaskStatePending:
   304  			pending = true
   305  		case structs.TaskStateDead:
   306  			if state.Failed {
   307  				failed = true
   308  			} else {
   309  				dead = true
   310  			}
   311  		}
   312  	}
   313  	r.taskStatusLock.RUnlock()
   314  
   315  	// Determine the alloc status
   316  	if failed {
   317  		alloc.ClientStatus = structs.AllocClientStatusFailed
   318  	} else if running {
   319  		alloc.ClientStatus = structs.AllocClientStatusRunning
   320  	} else if pending {
   321  		alloc.ClientStatus = structs.AllocClientStatusPending
   322  	} else if dead {
   323  		alloc.ClientStatus = structs.AllocClientStatusComplete
   324  	}
   325  
   326  	return alloc
   327  }
   328  
   329  // dirtySyncState is used to watch for state being marked dirty to sync
   330  func (r *AllocRunner) dirtySyncState() {
   331  	for {
   332  		select {
   333  		case <-r.dirtyCh:
   334  			r.syncStatus()
   335  		case <-r.destroyCh:
   336  			return
   337  		}
   338  	}
   339  }
   340  
   341  // syncStatus is used to run and sync the status when it changes
   342  func (r *AllocRunner) syncStatus() error {
   343  	// Get a copy of our alloc, update status server side and sync to disk
   344  	alloc := r.Alloc()
   345  	r.updater(alloc)
   346  	return r.saveAllocRunnerState()
   347  }
   348  
   349  // setStatus is used to update the allocation status
   350  func (r *AllocRunner) setStatus(status, desc string) {
   351  	r.allocLock.Lock()
   352  	r.allocClientStatus = status
   353  	r.allocClientDescription = desc
   354  	r.allocLock.Unlock()
   355  	select {
   356  	case r.dirtyCh <- struct{}{}:
   357  	default:
   358  	}
   359  }
   360  
   361  // setTaskState is used to set the status of a task. If state is empty then the
   362  // event is appended but not synced with the server. The event may be omitted
   363  func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent) {
   364  	r.taskStatusLock.Lock()
   365  	defer r.taskStatusLock.Unlock()
   366  	taskState, ok := r.taskStates[taskName]
   367  	if !ok {
   368  		taskState = &structs.TaskState{}
   369  		r.taskStates[taskName] = taskState
   370  	}
   371  
   372  	// Set the tasks state.
   373  	if event != nil {
   374  		if event.FailsTask {
   375  			taskState.Failed = true
   376  		}
   377  		r.appendTaskEvent(taskState, event)
   378  	}
   379  
   380  	if state == "" {
   381  		return
   382  	}
   383  
   384  	switch state {
   385  	case structs.TaskStateRunning:
   386  		// Capture the start time if it is just starting
   387  		if taskState.State != structs.TaskStateRunning {
   388  			taskState.StartedAt = time.Now().UTC()
   389  		}
   390  	case structs.TaskStateDead:
   391  		// Capture the finished time. If it has never started there is no finish
   392  		// time
   393  		if !taskState.StartedAt.IsZero() {
   394  			taskState.FinishedAt = time.Now().UTC()
   395  		}
   396  
   397  		// Find all tasks that are not the one that is dead and check if the one
   398  		// that is dead is a leader
   399  		var otherTaskRunners []*TaskRunner
   400  		var otherTaskNames []string
   401  		leader := false
   402  		for task, tr := range r.tasks {
   403  			if task != taskName {
   404  				otherTaskRunners = append(otherTaskRunners, tr)
   405  				otherTaskNames = append(otherTaskNames, task)
   406  			} else if tr.task.Leader {
   407  				leader = true
   408  			}
   409  		}
   410  
   411  		// If the task failed, we should kill all the other tasks in the task group.
   412  		if taskState.Failed {
   413  			for _, tr := range otherTaskRunners {
   414  				tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName))
   415  			}
   416  			if len(otherTaskRunners) > 0 {
   417  				r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames)
   418  			}
   419  		} else if leader {
   420  			// If the task was a leader task we should kill all the other tasks.
   421  			for _, tr := range otherTaskRunners {
   422  				tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead))
   423  			}
   424  			if len(otherTaskRunners) > 0 {
   425  				r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames)
   426  			}
   427  		}
   428  	}
   429  
   430  	// Store the new state
   431  	taskState.State = state
   432  
   433  	select {
   434  	case r.dirtyCh <- struct{}{}:
   435  	default:
   436  	}
   437  }
   438  
   439  // appendTaskEvent updates the task status by appending the new event.
   440  func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
   441  	capacity := 10
   442  	if state.Events == nil {
   443  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   444  	}
   445  
   446  	// If we hit capacity, then shift it.
   447  	if len(state.Events) == capacity {
   448  		old := state.Events
   449  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   450  		state.Events = append(state.Events, old[1:]...)
   451  	}
   452  
   453  	state.Events = append(state.Events, event)
   454  }
   455  
   456  // Run is a long running goroutine used to manage an allocation
   457  func (r *AllocRunner) Run() {
   458  	defer close(r.waitCh)
   459  	go r.dirtySyncState()
   460  
   461  	// Find the task group to run in the allocation
   462  	alloc := r.alloc
   463  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   464  	if tg == nil {
   465  		r.logger.Printf("[ERR] client: alloc '%s' for missing task group '%s'", alloc.ID, alloc.TaskGroup)
   466  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
   467  		return
   468  	}
   469  
   470  	// Create the execution context
   471  	r.allocDirLock.Lock()
   472  	if r.allocDir == nil {
   473  		// Build allocation directory
   474  		r.allocDir = allocdir.NewAllocDir(r.logger, filepath.Join(r.config.AllocDir, r.alloc.ID))
   475  		if err := r.allocDir.Build(); err != nil {
   476  			r.logger.Printf("[WARN] client: failed to build task directories: %v", err)
   477  			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
   478  			r.allocDirLock.Unlock()
   479  			return
   480  		}
   481  
   482  		if r.otherAllocDir != nil {
   483  			if err := r.allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil {
   484  				r.logger.Printf("[ERROR] client: failed to move alloc dir into alloc %q: %v", r.alloc.ID, err)
   485  			}
   486  			if err := r.otherAllocDir.Destroy(); err != nil {
   487  				r.logger.Printf("[ERROR] client: error destroying allocdir %v: %v", r.otherAllocDir.AllocDir, err)
   488  			}
   489  		}
   490  	}
   491  	r.allocDirLock.Unlock()
   492  
   493  	// Check if the allocation is in a terminal status. In this case, we don't
   494  	// start any of the task runners and directly wait for the destroy signal to
   495  	// clean up the allocation.
   496  	if alloc.TerminalStatus() {
   497  		r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.alloc.ID)
   498  		r.handleDestroy()
   499  		r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
   500  		return
   501  	}
   502  
   503  	// Start the task runners
   504  	r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.alloc.ID)
   505  	r.taskLock.Lock()
   506  	for _, task := range tg.Tasks {
   507  		if _, ok := r.restored[task.Name]; ok {
   508  			continue
   509  		}
   510  
   511  		r.allocDirLock.Lock()
   512  		taskdir := r.allocDir.NewTaskDir(task.Name)
   513  		r.allocDirLock.Unlock()
   514  
   515  		tr := NewTaskRunner(r.logger, r.config, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient)
   516  		r.tasks[task.Name] = tr
   517  		tr.MarkReceived()
   518  
   519  		go tr.Run()
   520  	}
   521  	r.taskLock.Unlock()
   522  
   523  	// taskDestroyEvent contains an event that caused the destroyment of a task
   524  	// in the allocation.
   525  	var taskDestroyEvent *structs.TaskEvent
   526  
   527  OUTER:
   528  	// Wait for updates
   529  	for {
   530  		select {
   531  		case update := <-r.updateCh:
   532  			// Store the updated allocation.
   533  			r.allocLock.Lock()
   534  			r.alloc = update
   535  			r.allocLock.Unlock()
   536  
   537  			// Check if we're in a terminal status
   538  			if update.TerminalStatus() {
   539  				taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   540  				break OUTER
   541  			}
   542  
   543  			// Update the task groups
   544  			runners := r.getTaskRunners()
   545  			for _, tr := range runners {
   546  				tr.Update(update)
   547  			}
   548  		case <-r.destroyCh:
   549  			taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   550  			break OUTER
   551  		}
   552  	}
   553  
   554  	// Kill the task runners
   555  	r.destroyTaskRunners(taskDestroyEvent)
   556  
   557  	// Block until we should destroy the state of the alloc
   558  	r.handleDestroy()
   559  	r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.alloc.ID)
   560  }
   561  
   562  // SetPreviousAllocDir sets the previous allocation directory of the current
   563  // allocation
   564  func (r *AllocRunner) SetPreviousAllocDir(allocDir *allocdir.AllocDir) {
   565  	r.otherAllocDir = allocDir
   566  }
   567  
   568  // destroyTaskRunners destroys the task runners, waits for them to terminate and
   569  // then saves state.
   570  func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) {
   571  	// Destroy each sub-task
   572  	runners := r.getTaskRunners()
   573  	for _, tr := range runners {
   574  		tr.Destroy(destroyEvent)
   575  	}
   576  
   577  	// Wait for termination of the task runners
   578  	for _, tr := range runners {
   579  		<-tr.WaitCh()
   580  	}
   581  
   582  	// Final state sync
   583  	r.syncStatus()
   584  }
   585  
   586  // handleDestroy blocks till the AllocRunner should be destroyed and does the
   587  // necessary cleanup.
   588  func (r *AllocRunner) handleDestroy() {
   589  	for {
   590  		select {
   591  		case <-r.destroyCh:
   592  			if err := r.DestroyContext(); err != nil {
   593  				r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
   594  					r.alloc.ID, err)
   595  			}
   596  			if err := r.DestroyState(); err != nil {
   597  				r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
   598  					r.alloc.ID, err)
   599  			}
   600  
   601  			return
   602  		case <-r.updateCh:
   603  			r.logger.Printf("[ERR] client: dropping update to terminal alloc '%s'", r.alloc.ID)
   604  		}
   605  	}
   606  }
   607  
   608  // Update is used to update the allocation of the context
   609  func (r *AllocRunner) Update(update *structs.Allocation) {
   610  	select {
   611  	case r.updateCh <- update:
   612  	default:
   613  		r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
   614  	}
   615  }
   616  
   617  // StatsReporter returns an interface to query resource usage statistics of an
   618  // allocation
   619  func (r *AllocRunner) StatsReporter() AllocStatsReporter {
   620  	return r
   621  }
   622  
   623  // getTaskRunners is a helper that returns a copy of the task runners list using
   624  // the taskLock.
   625  func (r *AllocRunner) getTaskRunners() []*TaskRunner {
   626  	// Get the task runners
   627  	r.taskLock.RLock()
   628  	defer r.taskLock.RUnlock()
   629  	runners := make([]*TaskRunner, 0, len(r.tasks))
   630  	for _, tr := range r.tasks {
   631  		runners = append(runners, tr)
   632  	}
   633  	return runners
   634  }
   635  
   636  // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
   637  // the allocation stats will only include the given task.
   638  func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
   639  	astat := &cstructs.AllocResourceUsage{
   640  		Tasks: make(map[string]*cstructs.TaskResourceUsage),
   641  	}
   642  
   643  	var flat []*cstructs.TaskResourceUsage
   644  	if taskFilter != "" {
   645  		r.taskLock.RLock()
   646  		tr, ok := r.tasks[taskFilter]
   647  		r.taskLock.RUnlock()
   648  		if !ok {
   649  			return nil, fmt.Errorf("allocation %q has no task %q", r.alloc.ID, taskFilter)
   650  		}
   651  		l := tr.LatestResourceUsage()
   652  		if l != nil {
   653  			astat.Tasks[taskFilter] = l
   654  			flat = []*cstructs.TaskResourceUsage{l}
   655  			astat.Timestamp = l.Timestamp
   656  		}
   657  	} else {
   658  		// Get the task runners
   659  		runners := r.getTaskRunners()
   660  		for _, tr := range runners {
   661  			l := tr.LatestResourceUsage()
   662  			if l != nil {
   663  				astat.Tasks[tr.task.Name] = l
   664  				flat = append(flat, l)
   665  				if l.Timestamp > astat.Timestamp {
   666  					astat.Timestamp = l.Timestamp
   667  				}
   668  			}
   669  		}
   670  	}
   671  
   672  	astat.ResourceUsage = sumTaskResourceUsage(flat)
   673  	return astat, nil
   674  }
   675  
   676  // sumTaskResourceUsage takes a set of task resources and sums their resources
   677  func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
   678  	summed := &cstructs.ResourceUsage{
   679  		MemoryStats: &cstructs.MemoryStats{},
   680  		CpuStats:    &cstructs.CpuStats{},
   681  	}
   682  	for _, usage := range usages {
   683  		summed.Add(usage.ResourceUsage)
   684  	}
   685  	return summed
   686  }
   687  
   688  // shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
   689  // checks if the current running allocation is behind and should be updated.
   690  func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool {
   691  	r.allocLock.Lock()
   692  	defer r.allocLock.Unlock()
   693  	return r.alloc.AllocModifyIndex < serverIndex
   694  }
   695  
   696  // Destroy is used to indicate that the allocation context should be destroyed
   697  func (r *AllocRunner) Destroy() {
   698  	r.destroyLock.Lock()
   699  	defer r.destroyLock.Unlock()
   700  
   701  	if r.destroy {
   702  		return
   703  	}
   704  	r.destroy = true
   705  	close(r.destroyCh)
   706  }
   707  
   708  // WaitCh returns a channel to wait for termination
   709  func (r *AllocRunner) WaitCh() <-chan struct{} {
   710  	return r.waitCh
   711  }