github.com/djenriquez/nomad-1@v0.8.1/client/alloc_runner.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log"
     7  	"os"
     8  	"path/filepath"
     9  	"sync"
    10  	"time"
    11  
    12  	metrics "github.com/armon/go-metrics"
    13  	"github.com/boltdb/bolt"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/client/allocdir"
    16  	"github.com/hashicorp/nomad/client/config"
    17  	"github.com/hashicorp/nomad/client/vaultclient"
    18  	"github.com/hashicorp/nomad/helper"
    19  	"github.com/hashicorp/nomad/nomad/structs"
    20  
    21  	cstructs "github.com/hashicorp/nomad/client/structs"
    22  )
    23  
    24  var (
    25  	// The following are the key paths written to the state database
    26  	allocRunnerStateAllocKey     = []byte("alloc")
    27  	allocRunnerStateImmutableKey = []byte("immutable")
    28  	allocRunnerStateMutableKey   = []byte("mutable")
    29  	allocRunnerStateAllocDirKey  = []byte("alloc-dir")
    30  )
    31  
    32  // AllocStateUpdater is used to update the status of an allocation
    33  type AllocStateUpdater func(alloc *structs.Allocation)
    34  
    35  type AllocStatsReporter interface {
    36  	LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
    37  }
    38  
    39  // AllocRunner is used to wrap an allocation and provide the execution context.
    40  type AllocRunner struct {
    41  	config  *config.Config
    42  	updater AllocStateUpdater
    43  	logger  *log.Logger
    44  
    45  	// allocID is the ID of this runner's allocation. Since it does not
    46  	// change for the lifetime of the AllocRunner it is safe to read
    47  	// without acquiring a lock (unlike alloc).
    48  	allocID string
    49  
    50  	alloc                  *structs.Allocation
    51  	allocClientStatus      string // Explicit status of allocation. Set when there are failures
    52  	allocClientDescription string
    53  	allocHealth            *bool // Whether the allocation is healthy
    54  	allocBroadcast         *cstructs.AllocBroadcaster
    55  	allocLock              sync.Mutex
    56  
    57  	dirtyCh chan struct{}
    58  
    59  	allocDir     *allocdir.AllocDir
    60  	allocDirLock sync.Mutex
    61  
    62  	tasks      map[string]*TaskRunner
    63  	taskStates map[string]*structs.TaskState
    64  	restored   map[string]struct{}
    65  	taskLock   sync.RWMutex
    66  
    67  	taskStatusLock sync.RWMutex
    68  
    69  	updateCh chan *structs.Allocation
    70  
    71  	vaultClient  vaultclient.VaultClient
    72  	consulClient ConsulServiceAPI
    73  
    74  	// prevAlloc allows for Waiting until a previous allocation exits and
    75  	// the migrates it data. If sticky volumes aren't used and there's no
    76  	// previous allocation a noop implementation is used so it always safe
    77  	// to call.
    78  	prevAlloc prevAllocWatcher
    79  
    80  	// ctx is cancelled with exitFn to cause the alloc to be destroyed
    81  	// (stopped and GC'd).
    82  	ctx    context.Context
    83  	exitFn context.CancelFunc
    84  
    85  	// waitCh is closed when the Run method exits. At that point the alloc
    86  	// has stopped and been GC'd.
    87  	waitCh chan struct{}
    88  
    89  	// State related fields
    90  	// stateDB is used to store the alloc runners state
    91  	stateDB        *bolt.DB
    92  	allocStateLock sync.Mutex
    93  
    94  	// persistedEval is the last persisted evaluation ID. Since evaluation
    95  	// IDs change on every allocation update we only need to persist the
    96  	// allocation when its eval ID != the last persisted eval ID.
    97  	persistedEvalLock sync.Mutex
    98  	persistedEval     string
    99  
   100  	// immutablePersisted and allocDirPersisted are used to track whether the
   101  	// immutable data and the alloc dir have been persisted. Once persisted we
   102  	// can lower write volume by not re-writing these values
   103  	immutablePersisted bool
   104  	allocDirPersisted  bool
   105  
   106  	// baseLabels are used when emitting tagged metrics. All alloc runner metrics
   107  	// will have these tags, and optionally more.
   108  	baseLabels []metrics.Label
   109  }
   110  
   111  // COMPAT: Remove in 0.7.0
   112  // allocRunnerState is used to snapshot the state of the alloc runner
   113  type allocRunnerState struct {
   114  	Version                string
   115  	Alloc                  *structs.Allocation
   116  	AllocDir               *allocdir.AllocDir
   117  	AllocClientStatus      string
   118  	AllocClientDescription string
   119  
   120  	// COMPAT: Remove in 0.7.0: removing will break upgrading directly from
   121  	//         0.5.2, so don't remove in the 0.6 series.
   122  	// Context is deprecated and only used to migrate from older releases.
   123  	// It will be removed in the future.
   124  	Context *struct {
   125  		AllocID  string // unused; included for completeness
   126  		AllocDir struct {
   127  			AllocDir  string
   128  			SharedDir string // unused; included for completeness
   129  			TaskDirs  map[string]string
   130  		}
   131  	} `json:"Context,omitempty"`
   132  }
   133  
   134  // allocRunnerAllocState is state that only has to be written when the alloc
   135  // changes.
   136  type allocRunnerAllocState struct {
   137  	Alloc *structs.Allocation
   138  }
   139  
   140  // allocRunnerImmutableState is state that only has to be written once.
   141  type allocRunnerImmutableState struct {
   142  	Version string
   143  }
   144  
   145  // allocRunnerMutableState is state that has to be written on each save as it
   146  // changes over the life-cycle of the alloc_runner.
   147  type allocRunnerMutableState struct {
   148  	AllocClientStatus      string
   149  	AllocClientDescription string
   150  	TaskStates             map[string]*structs.TaskState
   151  	DeploymentStatus       *structs.AllocDeploymentStatus
   152  }
   153  
   154  // NewAllocRunner is used to create a new allocation context
   155  func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater,
   156  	alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient ConsulServiceAPI,
   157  	prevAlloc prevAllocWatcher) *AllocRunner {
   158  
   159  	ar := &AllocRunner{
   160  		config:         config,
   161  		stateDB:        stateDB,
   162  		updater:        updater,
   163  		logger:         logger,
   164  		alloc:          alloc,
   165  		allocID:        alloc.ID,
   166  		allocBroadcast: cstructs.NewAllocBroadcaster(8),
   167  		prevAlloc:      prevAlloc,
   168  		dirtyCh:        make(chan struct{}, 1),
   169  		allocDir:       allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)),
   170  		tasks:          make(map[string]*TaskRunner),
   171  		taskStates:     copyTaskStates(alloc.TaskStates),
   172  		restored:       make(map[string]struct{}),
   173  		updateCh:       make(chan *structs.Allocation, 64),
   174  		waitCh:         make(chan struct{}),
   175  		vaultClient:    vaultClient,
   176  		consulClient:   consulClient,
   177  	}
   178  
   179  	// TODO Should be passed a context
   180  	ar.ctx, ar.exitFn = context.WithCancel(context.TODO())
   181  
   182  	return ar
   183  }
   184  
   185  // setBaseLabels creates the set of base labels. This should be called after
   186  // Restore has been called so the allocation is guaranteed to be loaded
   187  func (r *AllocRunner) setBaseLabels() {
   188  	r.baseLabels = make([]metrics.Label, 0, 3)
   189  
   190  	if r.alloc.Job != nil {
   191  		r.baseLabels = append(r.baseLabels, metrics.Label{
   192  			Name:  "job",
   193  			Value: r.alloc.Job.Name,
   194  		})
   195  	}
   196  	if r.alloc.TaskGroup != "" {
   197  		r.baseLabels = append(r.baseLabels, metrics.Label{
   198  			Name:  "task_group",
   199  			Value: r.alloc.TaskGroup,
   200  		})
   201  	}
   202  	if r.config != nil && r.config.Node != nil {
   203  		r.baseLabels = append(r.baseLabels, metrics.Label{
   204  			Name:  "node_id",
   205  			Value: r.config.Node.ID,
   206  		})
   207  	}
   208  }
   209  
   210  // pre060StateFilePath returns the path to our state file that would have been
   211  // written pre v0.6.0
   212  // COMPAT: Remove in 0.7.0
   213  func (r *AllocRunner) pre060StateFilePath() string {
   214  	r.allocLock.Lock()
   215  	defer r.allocLock.Unlock()
   216  	path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json")
   217  	return path
   218  }
   219  
   220  // RestoreState is used to restore the state of the alloc runner
   221  func (r *AllocRunner) RestoreState() error {
   222  
   223  	// COMPAT: Remove in 0.7.0
   224  	// Check if the old snapshot is there
   225  	oldPath := r.pre060StateFilePath()
   226  	var snap allocRunnerState
   227  	var upgrading bool
   228  	if err := pre060RestoreState(oldPath, &snap); err == nil {
   229  		// Restore fields
   230  		r.logger.Printf("[INFO] client: restoring pre v0.6.0 alloc runner state for alloc %q", r.allocID)
   231  		r.alloc = snap.Alloc
   232  		r.allocDir = snap.AllocDir
   233  		r.allocClientStatus = snap.AllocClientStatus
   234  		r.allocClientDescription = snap.AllocClientDescription
   235  
   236  		if r.alloc != nil {
   237  			r.taskStates = snap.Alloc.TaskStates
   238  		}
   239  
   240  		// COMPAT: Remove in 0.7.0
   241  		// #2132 Upgrade path: if snap.AllocDir is nil, try to convert old
   242  		// Context struct to new AllocDir struct
   243  		if snap.AllocDir == nil && snap.Context != nil {
   244  			r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.allocID)
   245  			r.allocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir)
   246  			for taskName := range snap.Context.AllocDir.TaskDirs {
   247  				r.allocDir.NewTaskDir(taskName)
   248  			}
   249  		}
   250  
   251  		// Delete the old state
   252  		os.RemoveAll(oldPath)
   253  		upgrading = true
   254  	} else if !os.IsNotExist(err) {
   255  		// Something corrupt in the old state file
   256  		return err
   257  	} else {
   258  		// We are doing a normal restore
   259  		err := r.stateDB.View(func(tx *bolt.Tx) error {
   260  			bkt, err := getAllocationBucket(tx, r.allocID)
   261  			if err != nil {
   262  				return fmt.Errorf("failed to get allocation bucket: %v", err)
   263  			}
   264  
   265  			// Get the state objects
   266  			var mutable allocRunnerMutableState
   267  			var immutable allocRunnerImmutableState
   268  			var allocState allocRunnerAllocState
   269  			var allocDir allocdir.AllocDir
   270  
   271  			if err := getObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil {
   272  				return fmt.Errorf("failed to read alloc runner alloc state: %v", err)
   273  			}
   274  			if err := getObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   275  				return fmt.Errorf("failed to read alloc runner immutable state: %v", err)
   276  			}
   277  			if err := getObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil {
   278  				return fmt.Errorf("failed to read alloc runner mutable state: %v", err)
   279  			}
   280  			if err := getObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil {
   281  				return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err)
   282  			}
   283  
   284  			// Populate the fields
   285  			r.alloc = allocState.Alloc
   286  			r.allocDir = &allocDir
   287  			r.allocClientStatus = mutable.AllocClientStatus
   288  			r.allocClientDescription = mutable.AllocClientDescription
   289  			r.taskStates = mutable.TaskStates
   290  			r.alloc.ClientStatus = getClientStatus(r.taskStates)
   291  			r.alloc.DeploymentStatus = mutable.DeploymentStatus
   292  			return nil
   293  		})
   294  
   295  		if err != nil {
   296  			return fmt.Errorf("failed to read allocation state: %v", err)
   297  		}
   298  	}
   299  
   300  	var snapshotErrors multierror.Error
   301  	if r.alloc == nil {
   302  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
   303  	}
   304  	if r.allocDir == nil {
   305  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir"))
   306  	}
   307  	if e := snapshotErrors.ErrorOrNil(); e != nil {
   308  		return e
   309  	}
   310  
   311  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   312  	if tg == nil {
   313  		return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup)
   314  	}
   315  
   316  	// Restore the task runners
   317  	taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled)
   318  	var mErr multierror.Error
   319  	for _, task := range tg.Tasks {
   320  		name := task.Name
   321  		state := r.taskStates[name]
   322  
   323  		// Nomad exited before task could start, nothing to restore.
   324  		// AllocRunner.Run will start a new TaskRunner for this task
   325  		if state == nil {
   326  			continue
   327  		}
   328  
   329  		// Mark the task as restored.
   330  		r.restored[name] = struct{}{}
   331  
   332  		td, ok := r.allocDir.TaskDirs[name]
   333  		if !ok {
   334  			// Create the task dir metadata if it doesn't exist.
   335  			// Since task dirs are created during r.Run() the
   336  			// client may save state and exit before all task dirs
   337  			// are created
   338  			td = r.allocDir.NewTaskDir(name)
   339  		}
   340  
   341  		// Skip tasks in terminal states.
   342  		if state.State == structs.TaskStateDead {
   343  			continue
   344  		}
   345  
   346  		tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient)
   347  		r.tasks[name] = tr
   348  
   349  		if restartReason, err := tr.RestoreState(); err != nil {
   350  			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err)
   351  			mErr.Errors = append(mErr.Errors, err)
   352  		} else if !r.alloc.TerminalStatus() {
   353  			// Only start if the alloc isn't in a terminal status.
   354  			go tr.Run()
   355  
   356  			if upgrading {
   357  				if err := tr.SaveState(); err != nil {
   358  					r.logger.Printf("[WARN] client: initial save state for alloc %s task %s failed: %v", r.allocID, name, err)
   359  				}
   360  			}
   361  
   362  			// Restart task runner if RestoreState gave a reason
   363  			if restartReason != "" {
   364  				r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason)
   365  				const failure = false
   366  				tr.Restart("upgrade", restartReason, failure)
   367  			}
   368  		} else {
   369  			tr.Destroy(taskDestroyEvent)
   370  		}
   371  	}
   372  
   373  	return mErr.ErrorOrNil()
   374  }
   375  
   376  // SaveState is used to snapshot the state of the alloc runner
   377  // if the fullSync is marked as false only the state of the Alloc Runner
   378  // is snapshotted. If fullSync is marked as true, we snapshot
   379  // all the Task Runners associated with the Alloc
   380  func (r *AllocRunner) SaveState() error {
   381  	if err := r.saveAllocRunnerState(); err != nil {
   382  		return err
   383  	}
   384  
   385  	// Save state for each task
   386  	runners := r.getTaskRunners()
   387  	var mErr multierror.Error
   388  	for _, tr := range runners {
   389  		if err := tr.SaveState(); err != nil {
   390  			mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v",
   391  				r.allocID, tr.task.Name, err))
   392  		}
   393  	}
   394  	return mErr.ErrorOrNil()
   395  }
   396  
   397  func (r *AllocRunner) saveAllocRunnerState() error {
   398  	r.allocStateLock.Lock()
   399  	defer r.allocStateLock.Unlock()
   400  
   401  	if r.ctx.Err() == context.Canceled {
   402  		return nil
   403  	}
   404  
   405  	// Grab all the relevant data
   406  	alloc := r.Alloc()
   407  
   408  	r.allocLock.Lock()
   409  	allocClientStatus := r.allocClientStatus
   410  	allocClientDescription := r.allocClientDescription
   411  	r.allocLock.Unlock()
   412  
   413  	r.allocDirLock.Lock()
   414  	allocDir := r.allocDir.Copy()
   415  	r.allocDirLock.Unlock()
   416  
   417  	// Start the transaction.
   418  	return r.stateDB.Batch(func(tx *bolt.Tx) error {
   419  
   420  		// Grab the allocation bucket
   421  		allocBkt, err := getAllocationBucket(tx, r.allocID)
   422  		if err != nil {
   423  			return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   424  		}
   425  
   426  		// Write the allocation if the eval has changed
   427  		r.persistedEvalLock.Lock()
   428  		lastPersisted := r.persistedEval
   429  		r.persistedEvalLock.Unlock()
   430  		if alloc.EvalID != lastPersisted {
   431  			allocState := &allocRunnerAllocState{
   432  				Alloc: alloc,
   433  			}
   434  
   435  			if err := putObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil {
   436  				return fmt.Errorf("failed to write alloc_runner alloc state: %v", err)
   437  			}
   438  
   439  			tx.OnCommit(func() {
   440  				r.persistedEvalLock.Lock()
   441  				r.persistedEval = alloc.EvalID
   442  				r.persistedEvalLock.Unlock()
   443  			})
   444  		}
   445  
   446  		// Write immutable data iff it hasn't been written yet
   447  		if !r.immutablePersisted {
   448  			immutable := &allocRunnerImmutableState{
   449  				Version: r.config.Version.VersionNumber(),
   450  			}
   451  
   452  			if err := putObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   453  				return fmt.Errorf("failed to write alloc_runner immutable state: %v", err)
   454  			}
   455  
   456  			tx.OnCommit(func() {
   457  				r.immutablePersisted = true
   458  			})
   459  		}
   460  
   461  		// Write the alloc dir data if it hasn't been written before and it exists.
   462  		if !r.allocDirPersisted && allocDir != nil {
   463  			if err := putObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil {
   464  				return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err)
   465  			}
   466  
   467  			tx.OnCommit(func() {
   468  				r.allocDirPersisted = true
   469  			})
   470  		}
   471  
   472  		// Write the mutable state every time
   473  		mutable := &allocRunnerMutableState{
   474  			AllocClientStatus:      allocClientStatus,
   475  			AllocClientDescription: allocClientDescription,
   476  			TaskStates:             alloc.TaskStates,
   477  			DeploymentStatus:       alloc.DeploymentStatus,
   478  		}
   479  
   480  		if err := putObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil {
   481  			return fmt.Errorf("failed to write alloc_runner mutable state: %v", err)
   482  		}
   483  
   484  		return nil
   485  	})
   486  }
   487  
   488  // DestroyState is used to cleanup after ourselves
   489  func (r *AllocRunner) DestroyState() error {
   490  	r.allocStateLock.Lock()
   491  	defer r.allocStateLock.Unlock()
   492  
   493  	return r.stateDB.Update(func(tx *bolt.Tx) error {
   494  		if err := deleteAllocationBucket(tx, r.allocID); err != nil {
   495  			return fmt.Errorf("failed to delete allocation bucket: %v", err)
   496  		}
   497  		return nil
   498  	})
   499  }
   500  
   501  // DestroyContext is used to destroy the context
   502  func (r *AllocRunner) DestroyContext() error {
   503  	return r.allocDir.Destroy()
   504  }
   505  
   506  // GetAllocDir returns the alloc dir for the alloc runner
   507  func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir {
   508  	return r.allocDir
   509  }
   510  
   511  // GetListener returns a listener for updates broadcast by this alloc runner.
   512  // Callers are responsible for calling Close on their Listener.
   513  func (r *AllocRunner) GetListener() *cstructs.AllocListener {
   514  	return r.allocBroadcast.Listen()
   515  }
   516  
   517  // copyTaskStates returns a copy of the passed task states.
   518  func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
   519  	copy := make(map[string]*structs.TaskState, len(states))
   520  	for task, state := range states {
   521  		copy[task] = state.Copy()
   522  	}
   523  	return copy
   524  }
   525  
   526  // finalizeTerminalAlloc sets any missing required fields like
   527  // finishedAt in the alloc runner's task States. finishedAt is used
   528  // to calculate reschedule time for failed allocs, so we make sure that
   529  // it is set
   530  func (r *AllocRunner) finalizeTerminalAlloc(alloc *structs.Allocation) {
   531  	if !alloc.ClientTerminalStatus() {
   532  		return
   533  	}
   534  	r.taskStatusLock.Lock()
   535  	defer r.taskStatusLock.Unlock()
   536  
   537  	group := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   538  	if r.taskStates == nil {
   539  		r.taskStates = make(map[string]*structs.TaskState)
   540  	}
   541  	now := time.Now()
   542  	for _, task := range group.Tasks {
   543  		ts, ok := r.taskStates[task.Name]
   544  		if !ok {
   545  			ts = &structs.TaskState{}
   546  			r.taskStates[task.Name] = ts
   547  		}
   548  		if ts.FinishedAt.IsZero() {
   549  			ts.FinishedAt = now
   550  		}
   551  	}
   552  	alloc.TaskStates = copyTaskStates(r.taskStates)
   553  }
   554  
   555  // Alloc returns the associated allocation
   556  func (r *AllocRunner) Alloc() *structs.Allocation {
   557  	r.allocLock.Lock()
   558  
   559  	// Don't do a deep copy of the job
   560  	alloc := r.alloc.CopySkipJob()
   561  
   562  	// The status has explicitly been set.
   563  	if r.allocClientStatus != "" || r.allocClientDescription != "" {
   564  		alloc.ClientStatus = r.allocClientStatus
   565  		alloc.ClientDescription = r.allocClientDescription
   566  
   567  		// Copy over the task states so we don't lose them
   568  		r.taskStatusLock.RLock()
   569  		alloc.TaskStates = copyTaskStates(r.taskStates)
   570  		r.taskStatusLock.RUnlock()
   571  
   572  		r.allocLock.Unlock()
   573  		r.finalizeTerminalAlloc(alloc)
   574  		return alloc
   575  	}
   576  
   577  	// The health has been set
   578  	if r.allocHealth != nil {
   579  		if alloc.DeploymentStatus == nil {
   580  			alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
   581  		}
   582  		alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth)
   583  	}
   584  	r.allocLock.Unlock()
   585  
   586  	// Scan the task states to determine the status of the alloc
   587  	r.taskStatusLock.RLock()
   588  	alloc.TaskStates = copyTaskStates(r.taskStates)
   589  	alloc.ClientStatus = getClientStatus(r.taskStates)
   590  	r.taskStatusLock.RUnlock()
   591  
   592  	// If the client status is failed and we are part of a deployment, mark the
   593  	// alloc as unhealthy. This guards against the watcher not be started.
   594  	r.allocLock.Lock()
   595  	if alloc.ClientStatus == structs.AllocClientStatusFailed &&
   596  		alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() {
   597  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
   598  			Healthy: helper.BoolToPtr(false),
   599  		}
   600  	}
   601  	r.allocLock.Unlock()
   602  	r.finalizeTerminalAlloc(alloc)
   603  	return alloc
   604  }
   605  
   606  // getClientStatus takes in the task states for a given allocation and computes
   607  // the client status
   608  func getClientStatus(taskStates map[string]*structs.TaskState) string {
   609  	var pending, running, dead, failed bool
   610  	for _, state := range taskStates {
   611  		switch state.State {
   612  		case structs.TaskStateRunning:
   613  			running = true
   614  		case structs.TaskStatePending:
   615  			pending = true
   616  		case structs.TaskStateDead:
   617  			if state.Failed {
   618  				failed = true
   619  			} else {
   620  				dead = true
   621  			}
   622  		}
   623  	}
   624  
   625  	// Determine the alloc status
   626  	if failed {
   627  		return structs.AllocClientStatusFailed
   628  	} else if running {
   629  		return structs.AllocClientStatusRunning
   630  	} else if pending {
   631  		return structs.AllocClientStatusPending
   632  	} else if dead {
   633  		return structs.AllocClientStatusComplete
   634  	}
   635  
   636  	return ""
   637  }
   638  
   639  // dirtySyncState is used to watch for state being marked dirty to sync
   640  func (r *AllocRunner) dirtySyncState() {
   641  	for {
   642  		select {
   643  		case <-r.dirtyCh:
   644  			if err := r.syncStatus(); err != nil {
   645  				// Only WARN instead of ERR because we continue on
   646  				r.logger.Printf("[WARN] client: error persisting alloc %q state: %v",
   647  					r.allocID, err)
   648  			}
   649  		case <-r.ctx.Done():
   650  			return
   651  		}
   652  	}
   653  }
   654  
   655  // syncStatus is used to run and sync the status when it changes
   656  func (r *AllocRunner) syncStatus() error {
   657  	// Get a copy of our alloc, update status server side and sync to disk
   658  	alloc := r.Alloc()
   659  	r.updater(alloc)
   660  	r.sendBroadcast(alloc)
   661  	return r.saveAllocRunnerState()
   662  }
   663  
   664  // sendBroadcast broadcasts an alloc update.
   665  func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) {
   666  	// Try to send the alloc up to three times with a delay to allow recovery.
   667  	sent := false
   668  	for i := 0; i < 3; i++ {
   669  		if sent = r.allocBroadcast.Send(alloc); sent {
   670  			break
   671  		}
   672  		time.Sleep(500 * time.Millisecond)
   673  	}
   674  	if !sent {
   675  		r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID)
   676  	}
   677  }
   678  
   679  // setStatus is used to update the allocation status
   680  func (r *AllocRunner) setStatus(status, desc string) {
   681  	r.allocLock.Lock()
   682  	r.allocClientStatus = status
   683  	r.allocClientDescription = desc
   684  	r.allocLock.Unlock()
   685  	select {
   686  	case r.dirtyCh <- struct{}{}:
   687  	default:
   688  	}
   689  }
   690  
   691  // setTaskState is used to set the status of a task. If lazySync is set then the
   692  // event is appended but not synced with the server. If state is omitted, the
   693  // last known state is used.
   694  func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) {
   695  	r.taskStatusLock.Lock()
   696  	defer r.taskStatusLock.Unlock()
   697  	taskState, ok := r.taskStates[taskName]
   698  	if !ok {
   699  		taskState = &structs.TaskState{}
   700  		r.taskStates[taskName] = taskState
   701  	}
   702  
   703  	// Set the tasks state.
   704  	if event != nil {
   705  		if event.FailsTask {
   706  			taskState.Failed = true
   707  		}
   708  		if event.Type == structs.TaskRestarting {
   709  			if !r.config.DisableTaggedMetrics {
   710  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"},
   711  					1, r.baseLabels)
   712  			}
   713  			if r.config.BackwardsCompatibleMetrics {
   714  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1)
   715  			}
   716  			taskState.Restarts++
   717  			taskState.LastRestart = time.Unix(0, event.Time)
   718  		}
   719  		r.appendTaskEvent(taskState, event)
   720  	}
   721  
   722  	if lazySync {
   723  		return
   724  	}
   725  
   726  	// If the state hasn't been set use the existing state.
   727  	if state == "" {
   728  		state = taskState.State
   729  		if taskState.State == "" {
   730  			state = structs.TaskStatePending
   731  		}
   732  	}
   733  
   734  	switch state {
   735  	case structs.TaskStateRunning:
   736  		// Capture the start time if it is just starting
   737  		if taskState.State != structs.TaskStateRunning {
   738  			taskState.StartedAt = time.Now().UTC()
   739  			if !r.config.DisableTaggedMetrics {
   740  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"},
   741  					1, r.baseLabels)
   742  			}
   743  			if r.config.BackwardsCompatibleMetrics {
   744  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1)
   745  			}
   746  		}
   747  	case structs.TaskStateDead:
   748  		// Capture the finished time if not already set
   749  		if taskState.FinishedAt.IsZero() {
   750  			taskState.FinishedAt = time.Now().UTC()
   751  		}
   752  
   753  		// Find all tasks that are not the one that is dead and check if the one
   754  		// that is dead is a leader
   755  		var otherTaskRunners []*TaskRunner
   756  		var otherTaskNames []string
   757  		leader := false
   758  		for task, tr := range r.tasks {
   759  			if task != taskName {
   760  				otherTaskRunners = append(otherTaskRunners, tr)
   761  				otherTaskNames = append(otherTaskNames, task)
   762  			} else if tr.task.Leader {
   763  				leader = true
   764  			}
   765  		}
   766  
   767  		// Emitting metrics to indicate task complete and failures
   768  		if taskState.Failed {
   769  			if !r.config.DisableTaggedMetrics {
   770  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"},
   771  					1, r.baseLabels)
   772  			}
   773  			if r.config.BackwardsCompatibleMetrics {
   774  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1)
   775  			}
   776  		} else {
   777  			if !r.config.DisableTaggedMetrics {
   778  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"},
   779  					1, r.baseLabels)
   780  			}
   781  			if r.config.BackwardsCompatibleMetrics {
   782  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1)
   783  			}
   784  		}
   785  		// If the task failed, we should kill all the other tasks in the task group.
   786  		if taskState.Failed {
   787  			for _, tr := range otherTaskRunners {
   788  				tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName))
   789  			}
   790  			if len(otherTaskRunners) > 0 {
   791  				r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames)
   792  			}
   793  		} else if leader {
   794  			// If the task was a leader task we should kill all the other tasks.
   795  			for _, tr := range otherTaskRunners {
   796  				tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead))
   797  			}
   798  			if len(otherTaskRunners) > 0 {
   799  				r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames)
   800  			}
   801  		}
   802  	}
   803  
   804  	// Store the new state
   805  	taskState.State = state
   806  
   807  	select {
   808  	case r.dirtyCh <- struct{}{}:
   809  	default:
   810  	}
   811  }
   812  
   813  // appendTaskEvent updates the task status by appending the new event.
   814  func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
   815  	capacity := 10
   816  	if state.Events == nil {
   817  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   818  	}
   819  
   820  	// If we hit capacity, then shift it.
   821  	if len(state.Events) == capacity {
   822  		old := state.Events
   823  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   824  		state.Events = append(state.Events, old[1:]...)
   825  	}
   826  
   827  	state.Events = append(state.Events, event)
   828  }
   829  
   830  // Run is a long running goroutine used to manage an allocation
   831  func (r *AllocRunner) Run() {
   832  	defer close(r.waitCh)
   833  	r.setBaseLabels()
   834  	go r.dirtySyncState()
   835  
   836  	// Find the task group to run in the allocation
   837  	alloc := r.Alloc()
   838  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   839  	if tg == nil {
   840  		r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup)
   841  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
   842  		return
   843  	}
   844  
   845  	// Build allocation directory (idempotent)
   846  	r.allocDirLock.Lock()
   847  	err := r.allocDir.Build()
   848  	r.allocDirLock.Unlock()
   849  
   850  	if err != nil {
   851  		r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err)
   852  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
   853  		return
   854  	}
   855  
   856  	// Wait for a previous alloc - if any - to terminate
   857  	if err := r.prevAlloc.Wait(r.ctx); err != nil {
   858  		if err == context.Canceled {
   859  			return
   860  		}
   861  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err))
   862  		return
   863  	}
   864  
   865  	// Wait for data to be migrated from a previous alloc if applicable
   866  	if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil {
   867  		if err == context.Canceled {
   868  			return
   869  		}
   870  
   871  		// Soft-fail on migration errors
   872  		r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err)
   873  
   874  		// Recreate alloc dir to ensure a clean slate
   875  		r.allocDir.Destroy()
   876  		if err := r.allocDir.Build(); err != nil {
   877  			r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err)
   878  			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup))
   879  			return
   880  		}
   881  	}
   882  
   883  	// Check if the allocation is in a terminal status. In this case, we don't
   884  	// start any of the task runners and directly wait for the destroy signal to
   885  	// clean up the allocation.
   886  	if alloc.TerminalStatus() {
   887  		r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID)
   888  		// mark this allocation as completed if it is not already in a
   889  		// terminal state
   890  		if !alloc.Terminated() {
   891  			r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state")
   892  		}
   893  		r.handleDestroy()
   894  		r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   895  		return
   896  	}
   897  
   898  	// Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution
   899  	if !r.config.DisableTaggedMetrics {
   900  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"},
   901  			1, r.baseLabels)
   902  	}
   903  	if r.config.BackwardsCompatibleMetrics {
   904  		metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1)
   905  	}
   906  
   907  	// Start the watcher
   908  	wCtx, watcherCancel := context.WithCancel(r.ctx)
   909  	go r.watchHealth(wCtx)
   910  
   911  	// Start the task runners
   912  	r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID)
   913  	r.taskLock.Lock()
   914  	for _, task := range tg.Tasks {
   915  		if _, ok := r.restored[task.Name]; ok {
   916  			continue
   917  		}
   918  
   919  		r.allocDirLock.Lock()
   920  		taskdir := r.allocDir.NewTaskDir(task.Name)
   921  		r.allocDirLock.Unlock()
   922  
   923  		tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient)
   924  		r.tasks[task.Name] = tr
   925  		tr.MarkReceived()
   926  
   927  		go tr.Run()
   928  	}
   929  	r.taskLock.Unlock()
   930  
   931  	// taskDestroyEvent contains an event that caused the destruction of a task
   932  	// in the allocation.
   933  	var taskDestroyEvent *structs.TaskEvent
   934  
   935  OUTER:
   936  	// Wait for updates
   937  	for {
   938  		select {
   939  		case update := <-r.updateCh:
   940  			// Store the updated allocation.
   941  			r.allocLock.Lock()
   942  
   943  			// If the deployment ids have changed clear the health
   944  			if r.alloc.DeploymentID != update.DeploymentID {
   945  				r.allocHealth = nil
   946  			}
   947  
   948  			r.alloc = update
   949  			r.allocLock.Unlock()
   950  
   951  			// Create a new watcher
   952  			watcherCancel()
   953  			wCtx, watcherCancel = context.WithCancel(r.ctx)
   954  			go r.watchHealth(wCtx)
   955  
   956  			// Check if we're in a terminal status
   957  			if update.TerminalStatus() {
   958  				taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   959  				break OUTER
   960  			}
   961  
   962  			// Update the task groups
   963  			runners := r.getTaskRunners()
   964  			for _, tr := range runners {
   965  				tr.Update(update)
   966  			}
   967  
   968  			if err := r.syncStatus(); err != nil {
   969  				r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v",
   970  					r.allocID, err)
   971  			}
   972  
   973  		case <-r.ctx.Done():
   974  			taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   975  			break OUTER
   976  		}
   977  	}
   978  
   979  	// Kill the task runners
   980  	r.destroyTaskRunners(taskDestroyEvent)
   981  
   982  	// Block until we should destroy the state of the alloc
   983  	r.handleDestroy()
   984  
   985  	// Free up the context. It has likely exited already
   986  	watcherCancel()
   987  
   988  	r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   989  }
   990  
   991  // destroyTaskRunners destroys the task runners, waits for them to terminate and
   992  // then saves state.
   993  func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) {
   994  	// First destroy the leader if one exists
   995  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   996  	leader := ""
   997  	for _, task := range tg.Tasks {
   998  		if task.Leader {
   999  			leader = task.Name
  1000  			break
  1001  		}
  1002  	}
  1003  	if leader != "" {
  1004  		r.taskLock.RLock()
  1005  		tr := r.tasks[leader]
  1006  		r.taskLock.RUnlock()
  1007  
  1008  		// Dead tasks don't have a task runner created so guard against
  1009  		// the leader being dead when this AR was saved.
  1010  		if tr == nil {
  1011  			r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped",
  1012  				r.allocID, leader, r.alloc.TaskGroup)
  1013  		} else {
  1014  			r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first",
  1015  				r.allocID, leader, r.alloc.TaskGroup)
  1016  			tr.Destroy(destroyEvent)
  1017  			<-tr.WaitCh()
  1018  		}
  1019  	}
  1020  
  1021  	// Then destroy non-leader tasks concurrently
  1022  	r.taskLock.RLock()
  1023  	for name, tr := range r.tasks {
  1024  		if name != leader {
  1025  			tr.Destroy(destroyEvent)
  1026  		}
  1027  	}
  1028  	r.taskLock.RUnlock()
  1029  
  1030  	// Wait for termination of the task runners
  1031  	for _, tr := range r.getTaskRunners() {
  1032  		<-tr.WaitCh()
  1033  	}
  1034  }
  1035  
  1036  // handleDestroy blocks till the AllocRunner should be destroyed and does the
  1037  // necessary cleanup.
  1038  func (r *AllocRunner) handleDestroy() {
  1039  	// Final state sync. We do this to ensure that the server has the correct
  1040  	// state as we wait for a destroy.
  1041  	alloc := r.Alloc()
  1042  
  1043  	// Increment the destroy count for this alloc runner since this allocation is being removed from this client.
  1044  	if !r.config.DisableTaggedMetrics {
  1045  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"},
  1046  			1, r.baseLabels)
  1047  	}
  1048  	if r.config.BackwardsCompatibleMetrics {
  1049  		metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1)
  1050  	}
  1051  
  1052  	// Broadcast and persist state synchronously
  1053  	r.sendBroadcast(alloc)
  1054  	if err := r.saveAllocRunnerState(); err != nil {
  1055  		r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v",
  1056  			r.allocID, err)
  1057  	}
  1058  
  1059  	// Unmount any mounted directories as no tasks are running and makes
  1060  	// cleaning up Nomad's data directory simpler.
  1061  	if err := r.allocDir.UnmountAll(); err != nil {
  1062  		r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err)
  1063  	}
  1064  
  1065  	// Update the server with the alloc's status -- also marks the alloc as
  1066  	// being eligible for GC, so from this point on the alloc can be gc'd
  1067  	// at any time.
  1068  	r.updater(alloc)
  1069  
  1070  	for {
  1071  		select {
  1072  		case <-r.ctx.Done():
  1073  			if err := r.DestroyContext(); err != nil {
  1074  				r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
  1075  					r.allocID, err)
  1076  			}
  1077  			if err := r.DestroyState(); err != nil {
  1078  				r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
  1079  					r.allocID, err)
  1080  			}
  1081  
  1082  			return
  1083  		case <-r.updateCh:
  1084  			r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID)
  1085  		}
  1086  	}
  1087  }
  1088  
  1089  // IsWaiting returns true if this alloc is waiting on a previous allocation to
  1090  // terminate.
  1091  func (r *AllocRunner) IsWaiting() bool {
  1092  	return r.prevAlloc.IsWaiting()
  1093  }
  1094  
  1095  // IsMigrating returns true if this alloc is migrating data from a previous
  1096  // allocation.
  1097  func (r *AllocRunner) IsMigrating() bool {
  1098  	return r.prevAlloc.IsMigrating()
  1099  }
  1100  
  1101  // Update is used to update the allocation of the context
  1102  func (r *AllocRunner) Update(update *structs.Allocation) {
  1103  	select {
  1104  	case r.updateCh <- update:
  1105  	default:
  1106  		r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
  1107  	}
  1108  }
  1109  
  1110  // StatsReporter returns an interface to query resource usage statistics of an
  1111  // allocation
  1112  func (r *AllocRunner) StatsReporter() AllocStatsReporter {
  1113  	return r
  1114  }
  1115  
  1116  // getTaskRunners is a helper that returns a copy of the task runners list using
  1117  // the taskLock.
  1118  func (r *AllocRunner) getTaskRunners() []*TaskRunner {
  1119  	// Get the task runners
  1120  	r.taskLock.RLock()
  1121  	defer r.taskLock.RUnlock()
  1122  	runners := make([]*TaskRunner, 0, len(r.tasks))
  1123  	for _, tr := range r.tasks {
  1124  		runners = append(runners, tr)
  1125  	}
  1126  	return runners
  1127  }
  1128  
  1129  // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
  1130  // the allocation stats will only include the given task.
  1131  func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
  1132  	astat := &cstructs.AllocResourceUsage{
  1133  		Tasks: make(map[string]*cstructs.TaskResourceUsage),
  1134  	}
  1135  
  1136  	var flat []*cstructs.TaskResourceUsage
  1137  	if taskFilter != "" {
  1138  		r.taskLock.RLock()
  1139  		tr, ok := r.tasks[taskFilter]
  1140  		r.taskLock.RUnlock()
  1141  		if !ok {
  1142  			return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter)
  1143  		}
  1144  		l := tr.LatestResourceUsage()
  1145  		if l != nil {
  1146  			astat.Tasks[taskFilter] = l
  1147  			flat = []*cstructs.TaskResourceUsage{l}
  1148  			astat.Timestamp = l.Timestamp
  1149  		}
  1150  	} else {
  1151  		// Get the task runners
  1152  		runners := r.getTaskRunners()
  1153  		for _, tr := range runners {
  1154  			l := tr.LatestResourceUsage()
  1155  			if l != nil {
  1156  				astat.Tasks[tr.task.Name] = l
  1157  				flat = append(flat, l)
  1158  				if l.Timestamp > astat.Timestamp {
  1159  					astat.Timestamp = l.Timestamp
  1160  				}
  1161  			}
  1162  		}
  1163  	}
  1164  
  1165  	astat.ResourceUsage = sumTaskResourceUsage(flat)
  1166  	return astat, nil
  1167  }
  1168  
  1169  // sumTaskResourceUsage takes a set of task resources and sums their resources
  1170  func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
  1171  	summed := &cstructs.ResourceUsage{
  1172  		MemoryStats: &cstructs.MemoryStats{},
  1173  		CpuStats:    &cstructs.CpuStats{},
  1174  	}
  1175  	for _, usage := range usages {
  1176  		summed.Add(usage.ResourceUsage)
  1177  	}
  1178  	return summed
  1179  }
  1180  
  1181  // shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
  1182  // checks if the current running allocation is behind and should be updated.
  1183  func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool {
  1184  	r.allocLock.Lock()
  1185  	defer r.allocLock.Unlock()
  1186  	return r.alloc.AllocModifyIndex < serverIndex
  1187  }
  1188  
  1189  // Destroy is used to indicate that the allocation context should be destroyed
  1190  func (r *AllocRunner) Destroy() {
  1191  	// Lock when closing the context as that gives the save state code
  1192  	// serialization.
  1193  	r.allocStateLock.Lock()
  1194  	defer r.allocStateLock.Unlock()
  1195  
  1196  	r.exitFn()
  1197  	r.allocBroadcast.Close()
  1198  }
  1199  
  1200  // IsDestroyed returns true if the AllocRunner is not running and has been
  1201  // destroyed (GC'd).
  1202  func (r *AllocRunner) IsDestroyed() bool {
  1203  	select {
  1204  	case <-r.waitCh:
  1205  		return true
  1206  	default:
  1207  		return false
  1208  	}
  1209  }
  1210  
  1211  // WaitCh returns a channel to wait for termination
  1212  func (r *AllocRunner) WaitCh() <-chan struct{} {
  1213  	return r.waitCh
  1214  }