github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/alloc_runner.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log"
     7  	"os"
     8  	"path/filepath"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/boltdb/bolt"
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/client/allocdir"
    15  	"github.com/hashicorp/nomad/client/config"
    16  	"github.com/hashicorp/nomad/client/vaultclient"
    17  	"github.com/hashicorp/nomad/helper"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  
    20  	cstructs "github.com/hashicorp/nomad/client/structs"
    21  )
    22  
    23  const (
    24  	// taskReceivedSyncLimit is how long the client will wait before sending
    25  	// that a task was received to the server. The client does not immediately
    26  	// send that the task was received to the server because another transition
    27  	// to running or failed is likely to occur immediately after and a single
    28  	// update will transfer all past state information. If not other transition
    29  	// has occurred up to this limit, we will send to the server.
    30  	taskReceivedSyncLimit = 30 * time.Second
    31  )
    32  
    33  var (
    34  	// The following are the key paths written to the state database
    35  	allocRunnerStateAllocKey     = []byte("alloc")
    36  	allocRunnerStateImmutableKey = []byte("immutable")
    37  	allocRunnerStateMutableKey   = []byte("mutable")
    38  	allocRunnerStateAllocDirKey  = []byte("alloc-dir")
    39  )
    40  
    41  // AllocStateUpdater is used to update the status of an allocation
    42  type AllocStateUpdater func(alloc *structs.Allocation)
    43  
    44  type AllocStatsReporter interface {
    45  	LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
    46  }
    47  
    48  // AllocRunner is used to wrap an allocation and provide the execution context.
    49  type AllocRunner struct {
    50  	config  *config.Config
    51  	updater AllocStateUpdater
    52  	logger  *log.Logger
    53  
    54  	// allocID is the ID of this runner's allocation. Since it does not
    55  	// change for the lifetime of the AllocRunner it is safe to read
    56  	// without acquiring a lock (unlike alloc).
    57  	allocID string
    58  
    59  	alloc                  *structs.Allocation
    60  	allocClientStatus      string // Explicit status of allocation. Set when there are failures
    61  	allocClientDescription string
    62  	allocHealth            *bool // Whether the allocation is healthy
    63  	allocBroadcast         *cstructs.AllocBroadcaster
    64  	allocLock              sync.Mutex
    65  
    66  	dirtyCh chan struct{}
    67  
    68  	allocDir     *allocdir.AllocDir
    69  	allocDirLock sync.Mutex
    70  
    71  	tasks      map[string]*TaskRunner
    72  	taskStates map[string]*structs.TaskState
    73  	restored   map[string]struct{}
    74  	taskLock   sync.RWMutex
    75  
    76  	taskStatusLock sync.RWMutex
    77  
    78  	updateCh chan *structs.Allocation
    79  
    80  	vaultClient  vaultclient.VaultClient
    81  	consulClient ConsulServiceAPI
    82  
    83  	otherAllocDir *allocdir.AllocDir
    84  
    85  	ctx    context.Context
    86  	exitFn context.CancelFunc
    87  	waitCh chan struct{}
    88  
    89  	// State related fields
    90  	// stateDB is used to store the alloc runners state
    91  	stateDB        *bolt.DB
    92  	allocStateLock sync.Mutex
    93  
    94  	// persistedEval is the last persisted evaluation ID. Since evaluation
    95  	// IDs change on every allocation update we only need to persist the
    96  	// allocation when its eval ID != the last persisted eval ID.
    97  	persistedEvalLock sync.Mutex
    98  	persistedEval     string
    99  
   100  	// immutablePersisted and allocDirPersisted are used to track whether the
   101  	// immutable data and the alloc dir have been persisted. Once persisted we
   102  	// can lower write volume by not re-writing these values
   103  	immutablePersisted bool
   104  	allocDirPersisted  bool
   105  }
   106  
   107  // COMPAT: Remove in 0.7.0
   108  // allocRunnerState is used to snapshot the state of the alloc runner
   109  type allocRunnerState struct {
   110  	Version                string
   111  	Alloc                  *structs.Allocation
   112  	AllocDir               *allocdir.AllocDir
   113  	AllocClientStatus      string
   114  	AllocClientDescription string
   115  
   116  	// COMPAT: Remove in 0.7.0: removing will break upgrading directly from
   117  	//         0.5.2, so don't remove in the 0.6 series.
   118  	// Context is deprecated and only used to migrate from older releases.
   119  	// It will be removed in the future.
   120  	Context *struct {
   121  		AllocID  string // unused; included for completeness
   122  		AllocDir struct {
   123  			AllocDir  string
   124  			SharedDir string // unused; included for completeness
   125  			TaskDirs  map[string]string
   126  		}
   127  	} `json:"Context,omitempty"`
   128  }
   129  
   130  // allocRunnerAllocState is state that only has to be written when the alloc
   131  // changes.
   132  type allocRunnerAllocState struct {
   133  	Alloc *structs.Allocation
   134  }
   135  
   136  // allocRunnerImmutableState is state that only has to be written once.
   137  type allocRunnerImmutableState struct {
   138  	Version string
   139  }
   140  
   141  // allocRunnerMutableState is state that has to be written on each save as it
   142  // changes over the life-cycle of the alloc_runner.
   143  type allocRunnerMutableState struct {
   144  	AllocClientStatus      string
   145  	AllocClientDescription string
   146  	TaskStates             map[string]*structs.TaskState
   147  	DeploymentStatus       *structs.AllocDeploymentStatus
   148  }
   149  
   150  // NewAllocRunner is used to create a new allocation context
   151  func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater,
   152  	alloc *structs.Allocation, vaultClient vaultclient.VaultClient,
   153  	consulClient ConsulServiceAPI) *AllocRunner {
   154  
   155  	ar := &AllocRunner{
   156  		config:         config,
   157  		stateDB:        stateDB,
   158  		updater:        updater,
   159  		logger:         logger,
   160  		alloc:          alloc,
   161  		allocID:        alloc.ID,
   162  		allocBroadcast: cstructs.NewAllocBroadcaster(8),
   163  		dirtyCh:        make(chan struct{}, 1),
   164  		allocDir:       allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)),
   165  		tasks:          make(map[string]*TaskRunner),
   166  		taskStates:     copyTaskStates(alloc.TaskStates),
   167  		restored:       make(map[string]struct{}),
   168  		updateCh:       make(chan *structs.Allocation, 64),
   169  		waitCh:         make(chan struct{}),
   170  		vaultClient:    vaultClient,
   171  		consulClient:   consulClient,
   172  	}
   173  
   174  	// TODO Should be passed a context
   175  	ar.ctx, ar.exitFn = context.WithCancel(context.TODO())
   176  	return ar
   177  }
   178  
   179  // pre060StateFilePath returns the path to our state file that would have been
   180  // written pre v0.6.0
   181  // COMPAT: Remove in 0.7.0
   182  func (r *AllocRunner) pre060StateFilePath() string {
   183  	r.allocLock.Lock()
   184  	defer r.allocLock.Unlock()
   185  	path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json")
   186  	return path
   187  }
   188  
   189  // RestoreState is used to restore the state of the alloc runner
   190  func (r *AllocRunner) RestoreState() error {
   191  
   192  	// COMPAT: Remove in 0.7.0
   193  	// Check if the old snapshot is there
   194  	oldPath := r.pre060StateFilePath()
   195  	var snap allocRunnerState
   196  	var upgrading bool
   197  	if err := pre060RestoreState(oldPath, &snap); err == nil {
   198  		// Restore fields
   199  		r.logger.Printf("[INFO] client: restoring pre v0.6.0 alloc runner state for alloc %q", r.allocID)
   200  		r.alloc = snap.Alloc
   201  		r.allocDir = snap.AllocDir
   202  		r.allocClientStatus = snap.AllocClientStatus
   203  		r.allocClientDescription = snap.AllocClientDescription
   204  
   205  		if r.alloc != nil {
   206  			r.taskStates = snap.Alloc.TaskStates
   207  		}
   208  
   209  		// COMPAT: Remove in 0.7.0
   210  		// #2132 Upgrade path: if snap.AllocDir is nil, try to convert old
   211  		// Context struct to new AllocDir struct
   212  		if snap.AllocDir == nil && snap.Context != nil {
   213  			r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.allocID)
   214  			r.allocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir)
   215  			for taskName := range snap.Context.AllocDir.TaskDirs {
   216  				r.allocDir.NewTaskDir(taskName)
   217  			}
   218  		}
   219  
   220  		// Delete the old state
   221  		os.RemoveAll(oldPath)
   222  		upgrading = true
   223  	} else if !os.IsNotExist(err) {
   224  		// Something corrupt in the old state file
   225  		return err
   226  	} else {
   227  		// We are doing a normal restore
   228  		err := r.stateDB.View(func(tx *bolt.Tx) error {
   229  			bkt, err := getAllocationBucket(tx, r.allocID)
   230  			if err != nil {
   231  				return fmt.Errorf("failed to get allocation bucket: %v", err)
   232  			}
   233  
   234  			// Get the state objects
   235  			var mutable allocRunnerMutableState
   236  			var immutable allocRunnerImmutableState
   237  			var allocState allocRunnerAllocState
   238  			var allocDir allocdir.AllocDir
   239  
   240  			if err := getObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil {
   241  				return fmt.Errorf("failed to read alloc runner alloc state: %v", err)
   242  			}
   243  			if err := getObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   244  				return fmt.Errorf("failed to read alloc runner immutable state: %v", err)
   245  			}
   246  			if err := getObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil {
   247  				return fmt.Errorf("failed to read alloc runner mutable state: %v", err)
   248  			}
   249  			if err := getObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil {
   250  				return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err)
   251  			}
   252  
   253  			// Populate the fields
   254  			r.alloc = allocState.Alloc
   255  			r.allocDir = &allocDir
   256  			r.allocClientStatus = mutable.AllocClientStatus
   257  			r.allocClientDescription = mutable.AllocClientDescription
   258  			r.taskStates = mutable.TaskStates
   259  			r.alloc.ClientStatus = getClientStatus(r.taskStates)
   260  			r.alloc.DeploymentStatus = mutable.DeploymentStatus
   261  			return nil
   262  		})
   263  
   264  		if err != nil {
   265  			return fmt.Errorf("failed to read allocation state: %v", err)
   266  		}
   267  	}
   268  
   269  	var snapshotErrors multierror.Error
   270  	if r.alloc == nil {
   271  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
   272  	}
   273  	if r.allocDir == nil {
   274  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir"))
   275  	}
   276  	if e := snapshotErrors.ErrorOrNil(); e != nil {
   277  		return e
   278  	}
   279  
   280  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   281  	if tg == nil {
   282  		return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup)
   283  	}
   284  
   285  	// Restore the task runners
   286  	taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled)
   287  	var mErr multierror.Error
   288  	for _, task := range tg.Tasks {
   289  		name := task.Name
   290  		state := r.taskStates[name]
   291  
   292  		// Mark the task as restored.
   293  		r.restored[name] = struct{}{}
   294  
   295  		td, ok := r.allocDir.TaskDirs[name]
   296  		if !ok {
   297  			// Create the task dir metadata if it doesn't exist.
   298  			// Since task dirs are created during r.Run() the
   299  			// client may save state and exit before all task dirs
   300  			// are created
   301  			td = r.allocDir.NewTaskDir(name)
   302  		}
   303  
   304  		// Skip tasks in terminal states.
   305  		if state.State == structs.TaskStateDead {
   306  			continue
   307  		}
   308  
   309  		tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient)
   310  		r.tasks[name] = tr
   311  
   312  		if restartReason, err := tr.RestoreState(); err != nil {
   313  			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err)
   314  			mErr.Errors = append(mErr.Errors, err)
   315  		} else if !r.alloc.TerminalStatus() {
   316  			// Only start if the alloc isn't in a terminal status.
   317  			go tr.Run()
   318  
   319  			if upgrading {
   320  				if err := tr.SaveState(); err != nil {
   321  					r.logger.Printf("[WARN] client: initial save state for alloc %s task %s failed: %v", r.allocID, name, err)
   322  				}
   323  			}
   324  
   325  			// Restart task runner if RestoreState gave a reason
   326  			if restartReason != "" {
   327  				r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason)
   328  				tr.Restart("upgrade", restartReason)
   329  			}
   330  		} else {
   331  			tr.Destroy(taskDestroyEvent)
   332  		}
   333  	}
   334  
   335  	return mErr.ErrorOrNil()
   336  }
   337  
   338  // SaveState is used to snapshot the state of the alloc runner
   339  // if the fullSync is marked as false only the state of the Alloc Runner
   340  // is snapshotted. If fullSync is marked as true, we snapshot
   341  // all the Task Runners associated with the Alloc
   342  func (r *AllocRunner) SaveState() error {
   343  	if err := r.saveAllocRunnerState(); err != nil {
   344  		return err
   345  	}
   346  
   347  	// Save state for each task
   348  	runners := r.getTaskRunners()
   349  	var mErr multierror.Error
   350  	for _, tr := range runners {
   351  		if err := tr.SaveState(); err != nil {
   352  			mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v",
   353  				r.allocID, tr.task.Name, err))
   354  		}
   355  	}
   356  	return mErr.ErrorOrNil()
   357  }
   358  
   359  func (r *AllocRunner) saveAllocRunnerState() error {
   360  	r.allocStateLock.Lock()
   361  	defer r.allocStateLock.Unlock()
   362  
   363  	if r.ctx.Err() == context.Canceled {
   364  		return nil
   365  	}
   366  
   367  	// Grab all the relevant data
   368  	alloc := r.Alloc()
   369  
   370  	r.allocLock.Lock()
   371  	allocClientStatus := r.allocClientStatus
   372  	allocClientDescription := r.allocClientDescription
   373  	r.allocLock.Unlock()
   374  
   375  	r.allocDirLock.Lock()
   376  	allocDir := r.allocDir.Copy()
   377  	r.allocDirLock.Unlock()
   378  
   379  	// Start the transaction.
   380  	return r.stateDB.Batch(func(tx *bolt.Tx) error {
   381  
   382  		// Grab the allocation bucket
   383  		allocBkt, err := getAllocationBucket(tx, r.allocID)
   384  		if err != nil {
   385  			return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   386  		}
   387  
   388  		// Write the allocation if the eval has changed
   389  		r.persistedEvalLock.Lock()
   390  		lastPersisted := r.persistedEval
   391  		r.persistedEvalLock.Unlock()
   392  		if alloc.EvalID != lastPersisted {
   393  			allocState := &allocRunnerAllocState{
   394  				Alloc: alloc,
   395  			}
   396  
   397  			if err := putObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil {
   398  				return fmt.Errorf("failed to write alloc_runner alloc state: %v", err)
   399  			}
   400  
   401  			tx.OnCommit(func() {
   402  				r.persistedEvalLock.Lock()
   403  				r.persistedEval = alloc.EvalID
   404  				r.persistedEvalLock.Unlock()
   405  			})
   406  		}
   407  
   408  		// Write immutable data iff it hasn't been written yet
   409  		if !r.immutablePersisted {
   410  			immutable := &allocRunnerImmutableState{
   411  				Version: r.config.Version,
   412  			}
   413  
   414  			if err := putObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   415  				return fmt.Errorf("failed to write alloc_runner immutable state: %v", err)
   416  			}
   417  
   418  			tx.OnCommit(func() {
   419  				r.immutablePersisted = true
   420  			})
   421  		}
   422  
   423  		// Write the alloc dir data if it hasn't been written before and it exists.
   424  		if !r.allocDirPersisted && allocDir != nil {
   425  			if err := putObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil {
   426  				return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err)
   427  			}
   428  
   429  			tx.OnCommit(func() {
   430  				r.allocDirPersisted = true
   431  			})
   432  		}
   433  
   434  		// Write the mutable state every time
   435  		mutable := &allocRunnerMutableState{
   436  			AllocClientStatus:      allocClientStatus,
   437  			AllocClientDescription: allocClientDescription,
   438  			TaskStates:             alloc.TaskStates,
   439  			DeploymentStatus:       alloc.DeploymentStatus,
   440  		}
   441  
   442  		if err := putObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil {
   443  			return fmt.Errorf("failed to write alloc_runner mutable state: %v", err)
   444  		}
   445  
   446  		return nil
   447  	})
   448  }
   449  
   450  // DestroyState is used to cleanup after ourselves
   451  func (r *AllocRunner) DestroyState() error {
   452  	r.allocStateLock.Lock()
   453  	defer r.allocStateLock.Unlock()
   454  
   455  	return r.stateDB.Update(func(tx *bolt.Tx) error {
   456  		if err := deleteAllocationBucket(tx, r.allocID); err != nil {
   457  			return fmt.Errorf("failed to delete allocation bucket: %v", err)
   458  		}
   459  		return nil
   460  	})
   461  }
   462  
   463  // DestroyContext is used to destroy the context
   464  func (r *AllocRunner) DestroyContext() error {
   465  	return r.allocDir.Destroy()
   466  }
   467  
   468  // GetAllocDir returns the alloc dir for the alloc runner
   469  func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir {
   470  	return r.allocDir
   471  }
   472  
   473  // copyTaskStates returns a copy of the passed task states.
   474  func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
   475  	copy := make(map[string]*structs.TaskState, len(states))
   476  	for task, state := range states {
   477  		copy[task] = state.Copy()
   478  	}
   479  	return copy
   480  }
   481  
   482  // Alloc returns the associated allocation
   483  func (r *AllocRunner) Alloc() *structs.Allocation {
   484  	r.allocLock.Lock()
   485  
   486  	// Don't do a deep copy of the job
   487  	alloc := r.alloc.CopySkipJob()
   488  
   489  	// The status has explicitly been set.
   490  	if r.allocClientStatus != "" || r.allocClientDescription != "" {
   491  		alloc.ClientStatus = r.allocClientStatus
   492  		alloc.ClientDescription = r.allocClientDescription
   493  
   494  		// Copy over the task states so we don't lose them
   495  		r.taskStatusLock.RLock()
   496  		alloc.TaskStates = copyTaskStates(r.taskStates)
   497  		r.taskStatusLock.RUnlock()
   498  
   499  		r.allocLock.Unlock()
   500  		return alloc
   501  	}
   502  
   503  	// The health has been set
   504  	if r.allocHealth != nil {
   505  		if alloc.DeploymentStatus == nil {
   506  			alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
   507  		}
   508  		alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth)
   509  	}
   510  	r.allocLock.Unlock()
   511  
   512  	// Scan the task states to determine the status of the alloc
   513  	r.taskStatusLock.RLock()
   514  	alloc.TaskStates = copyTaskStates(r.taskStates)
   515  	alloc.ClientStatus = getClientStatus(r.taskStates)
   516  	r.taskStatusLock.RUnlock()
   517  
   518  	// If the client status is failed and we are part of a deployment, mark the
   519  	// alloc as unhealthy. This guards against the watcher not be started.
   520  	r.allocLock.Lock()
   521  	if alloc.ClientStatus == structs.AllocClientStatusFailed &&
   522  		alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() {
   523  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
   524  			Healthy: helper.BoolToPtr(false),
   525  		}
   526  	}
   527  	r.allocLock.Unlock()
   528  
   529  	return alloc
   530  }
   531  
   532  // getClientStatus takes in the task states for a given allocation and computes
   533  // the client status
   534  func getClientStatus(taskStates map[string]*structs.TaskState) string {
   535  	var pending, running, dead, failed bool
   536  	for _, state := range taskStates {
   537  		switch state.State {
   538  		case structs.TaskStateRunning:
   539  			running = true
   540  		case structs.TaskStatePending:
   541  			pending = true
   542  		case structs.TaskStateDead:
   543  			if state.Failed {
   544  				failed = true
   545  			} else {
   546  				dead = true
   547  			}
   548  		}
   549  	}
   550  
   551  	// Determine the alloc status
   552  	if failed {
   553  		return structs.AllocClientStatusFailed
   554  	} else if running {
   555  		return structs.AllocClientStatusRunning
   556  	} else if pending {
   557  		return structs.AllocClientStatusPending
   558  	} else if dead {
   559  		return structs.AllocClientStatusComplete
   560  	}
   561  
   562  	return ""
   563  }
   564  
   565  // dirtySyncState is used to watch for state being marked dirty to sync
   566  func (r *AllocRunner) dirtySyncState() {
   567  	for {
   568  		select {
   569  		case <-r.dirtyCh:
   570  			if err := r.syncStatus(); err != nil {
   571  				// Only WARN instead of ERR because we continue on
   572  				r.logger.Printf("[WARN] client: error persisting alloc %q state: %v",
   573  					r.allocID, err)
   574  			}
   575  		case <-r.ctx.Done():
   576  			return
   577  		}
   578  	}
   579  }
   580  
   581  // syncStatus is used to run and sync the status when it changes
   582  func (r *AllocRunner) syncStatus() error {
   583  	// Get a copy of our alloc, update status server side and sync to disk
   584  	alloc := r.Alloc()
   585  	r.updater(alloc)
   586  	r.sendBroadcast(alloc)
   587  	return r.saveAllocRunnerState()
   588  }
   589  
   590  // sendBroadcast broadcasts an alloc update.
   591  func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) {
   592  	// Try to send the alloc up to three times with a delay to allow recovery.
   593  	sent := false
   594  	for i := 0; i < 3; i++ {
   595  		if sent = r.allocBroadcast.Send(alloc); sent {
   596  			break
   597  		}
   598  		time.Sleep(500 * time.Millisecond)
   599  	}
   600  	if !sent {
   601  		r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID)
   602  	}
   603  }
   604  
   605  // setStatus is used to update the allocation status
   606  func (r *AllocRunner) setStatus(status, desc string) {
   607  	r.allocLock.Lock()
   608  	r.allocClientStatus = status
   609  	r.allocClientDescription = desc
   610  	r.allocLock.Unlock()
   611  	select {
   612  	case r.dirtyCh <- struct{}{}:
   613  	default:
   614  	}
   615  }
   616  
   617  // setTaskState is used to set the status of a task. If state is empty then the
   618  // event is appended but not synced with the server. The event may be omitted
   619  func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent) {
   620  	r.taskStatusLock.Lock()
   621  	defer r.taskStatusLock.Unlock()
   622  	taskState, ok := r.taskStates[taskName]
   623  	if !ok {
   624  		taskState = &structs.TaskState{}
   625  		r.taskStates[taskName] = taskState
   626  	}
   627  
   628  	// Set the tasks state.
   629  	if event != nil {
   630  		if event.FailsTask {
   631  			taskState.Failed = true
   632  		}
   633  		if event.Type == structs.TaskRestarting {
   634  			taskState.Restarts++
   635  			taskState.LastRestart = time.Unix(0, event.Time)
   636  		}
   637  		r.appendTaskEvent(taskState, event)
   638  	}
   639  
   640  	if state == "" {
   641  		return
   642  	}
   643  
   644  	switch state {
   645  	case structs.TaskStateRunning:
   646  		// Capture the start time if it is just starting
   647  		if taskState.State != structs.TaskStateRunning {
   648  			taskState.StartedAt = time.Now().UTC()
   649  		}
   650  	case structs.TaskStateDead:
   651  		// Capture the finished time. If it has never started there is no finish
   652  		// time
   653  		if !taskState.StartedAt.IsZero() {
   654  			taskState.FinishedAt = time.Now().UTC()
   655  		}
   656  
   657  		// Find all tasks that are not the one that is dead and check if the one
   658  		// that is dead is a leader
   659  		var otherTaskRunners []*TaskRunner
   660  		var otherTaskNames []string
   661  		leader := false
   662  		for task, tr := range r.tasks {
   663  			if task != taskName {
   664  				otherTaskRunners = append(otherTaskRunners, tr)
   665  				otherTaskNames = append(otherTaskNames, task)
   666  			} else if tr.task.Leader {
   667  				leader = true
   668  			}
   669  		}
   670  
   671  		// If the task failed, we should kill all the other tasks in the task group.
   672  		if taskState.Failed {
   673  			for _, tr := range otherTaskRunners {
   674  				tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName))
   675  			}
   676  			if len(otherTaskRunners) > 0 {
   677  				r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames)
   678  			}
   679  		} else if leader {
   680  			// If the task was a leader task we should kill all the other tasks.
   681  			for _, tr := range otherTaskRunners {
   682  				tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead))
   683  			}
   684  			if len(otherTaskRunners) > 0 {
   685  				r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames)
   686  			}
   687  		}
   688  	}
   689  
   690  	// Store the new state
   691  	taskState.State = state
   692  
   693  	select {
   694  	case r.dirtyCh <- struct{}{}:
   695  	default:
   696  	}
   697  }
   698  
   699  // appendTaskEvent updates the task status by appending the new event.
   700  func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
   701  	capacity := 10
   702  	if state.Events == nil {
   703  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   704  	}
   705  
   706  	// If we hit capacity, then shift it.
   707  	if len(state.Events) == capacity {
   708  		old := state.Events
   709  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   710  		state.Events = append(state.Events, old[1:]...)
   711  	}
   712  
   713  	state.Events = append(state.Events, event)
   714  }
   715  
   716  // Run is a long running goroutine used to manage an allocation
   717  func (r *AllocRunner) Run() {
   718  	defer close(r.waitCh)
   719  	go r.dirtySyncState()
   720  
   721  	// Find the task group to run in the allocation
   722  	alloc := r.Alloc()
   723  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   724  	if tg == nil {
   725  		r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup)
   726  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
   727  		return
   728  	}
   729  
   730  	// Create the execution context
   731  	r.allocDirLock.Lock()
   732  	// Build allocation directory (idempotent)
   733  	if err := r.allocDir.Build(); err != nil {
   734  		r.logger.Printf("[ERR] client: failed to build task directories: %v", err)
   735  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
   736  		r.allocDirLock.Unlock()
   737  		return
   738  	}
   739  
   740  	if r.otherAllocDir != nil {
   741  		if err := r.allocDir.Move(r.otherAllocDir, tg.Tasks); err != nil {
   742  			r.logger.Printf("[ERR] client: failed to move alloc dir into alloc %q: %v", r.allocID, err)
   743  		}
   744  		if err := r.otherAllocDir.Destroy(); err != nil {
   745  			r.logger.Printf("[ERR] client: error destroying allocdir %v: %v", r.otherAllocDir.AllocDir, err)
   746  		}
   747  	}
   748  	r.allocDirLock.Unlock()
   749  
   750  	// Check if the allocation is in a terminal status. In this case, we don't
   751  	// start any of the task runners and directly wait for the destroy signal to
   752  	// clean up the allocation.
   753  	if alloc.TerminalStatus() {
   754  		r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID)
   755  		r.handleDestroy()
   756  		r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   757  		return
   758  	}
   759  
   760  	// Start the watcher
   761  	wCtx, watcherCancel := context.WithCancel(r.ctx)
   762  	go r.watchHealth(wCtx)
   763  
   764  	// Start the task runners
   765  	r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID)
   766  	r.taskLock.Lock()
   767  	for _, task := range tg.Tasks {
   768  		if _, ok := r.restored[task.Name]; ok {
   769  			continue
   770  		}
   771  
   772  		r.allocDirLock.Lock()
   773  		taskdir := r.allocDir.NewTaskDir(task.Name)
   774  		r.allocDirLock.Unlock()
   775  
   776  		tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient)
   777  		r.tasks[task.Name] = tr
   778  		tr.MarkReceived()
   779  
   780  		go tr.Run()
   781  	}
   782  	r.taskLock.Unlock()
   783  
   784  	// taskDestroyEvent contains an event that caused the destroyment of a task
   785  	// in the allocation.
   786  	var taskDestroyEvent *structs.TaskEvent
   787  
   788  OUTER:
   789  	// Wait for updates
   790  	for {
   791  		select {
   792  		case update := <-r.updateCh:
   793  			// Store the updated allocation.
   794  			r.allocLock.Lock()
   795  
   796  			// If the deployment ids have changed clear the health
   797  			if r.alloc.DeploymentID != update.DeploymentID {
   798  				r.allocHealth = nil
   799  			}
   800  
   801  			r.alloc = update
   802  			r.allocLock.Unlock()
   803  
   804  			// Create a new watcher
   805  			watcherCancel()
   806  			wCtx, watcherCancel = context.WithCancel(r.ctx)
   807  			go r.watchHealth(wCtx)
   808  
   809  			// Check if we're in a terminal status
   810  			if update.TerminalStatus() {
   811  				taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   812  				break OUTER
   813  			}
   814  
   815  			// Update the task groups
   816  			runners := r.getTaskRunners()
   817  			for _, tr := range runners {
   818  				tr.Update(update)
   819  			}
   820  
   821  			if err := r.syncStatus(); err != nil {
   822  				r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v",
   823  					r.allocID, err)
   824  			}
   825  		case <-r.ctx.Done():
   826  			taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   827  			break OUTER
   828  		}
   829  	}
   830  
   831  	// Kill the task runners
   832  	r.destroyTaskRunners(taskDestroyEvent)
   833  
   834  	// Block until we should destroy the state of the alloc
   835  	r.handleDestroy()
   836  
   837  	// Free up the context. It has likely exited already
   838  	watcherCancel()
   839  
   840  	r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   841  }
   842  
   843  // SetPreviousAllocDir sets the previous allocation directory of the current
   844  // allocation
   845  func (r *AllocRunner) SetPreviousAllocDir(allocDir *allocdir.AllocDir) {
   846  	r.otherAllocDir = allocDir
   847  }
   848  
   849  // destroyTaskRunners destroys the task runners, waits for them to terminate and
   850  // then saves state.
   851  func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) {
   852  	// First destroy the leader if one exists
   853  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   854  	leader := ""
   855  	for _, task := range tg.Tasks {
   856  		if task.Leader {
   857  			leader = task.Name
   858  			break
   859  		}
   860  	}
   861  	if leader != "" {
   862  		r.taskLock.RLock()
   863  		tr := r.tasks[leader]
   864  		r.taskLock.RUnlock()
   865  
   866  		r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first",
   867  			r.allocID, leader, r.alloc.TaskGroup)
   868  		tr.Destroy(destroyEvent)
   869  		<-tr.WaitCh()
   870  	}
   871  
   872  	// Then destroy non-leader tasks concurrently
   873  	r.taskLock.RLock()
   874  	for name, tr := range r.tasks {
   875  		if name != leader {
   876  			tr.Destroy(destroyEvent)
   877  		}
   878  	}
   879  	r.taskLock.RUnlock()
   880  
   881  	// Wait for termination of the task runners
   882  	for _, tr := range r.getTaskRunners() {
   883  		<-tr.WaitCh()
   884  	}
   885  }
   886  
   887  // handleDestroy blocks till the AllocRunner should be destroyed and does the
   888  // necessary cleanup.
   889  func (r *AllocRunner) handleDestroy() {
   890  	// Final state sync. We do this to ensure that the server has the correct
   891  	// state as we wait for a destroy.
   892  	alloc := r.Alloc()
   893  
   894  	//TODO(schmichael) updater can cause a GC which can block on this alloc
   895  	// runner shutting down. Since handleDestroy can be called by Run() we
   896  	// can't block shutdown here as it would cause a deadlock.
   897  	go r.updater(alloc)
   898  
   899  	// Broadcast and persist state synchronously
   900  	r.sendBroadcast(alloc)
   901  	if err := r.saveAllocRunnerState(); err != nil {
   902  		r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v",
   903  			r.allocID, err)
   904  	}
   905  
   906  	for {
   907  		select {
   908  		case <-r.ctx.Done():
   909  			if err := r.DestroyContext(); err != nil {
   910  				r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
   911  					r.allocID, err)
   912  			}
   913  			if err := r.DestroyState(); err != nil {
   914  				r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
   915  					r.allocID, err)
   916  			}
   917  
   918  			return
   919  		case <-r.updateCh:
   920  			r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID)
   921  		}
   922  	}
   923  }
   924  
   925  // Update is used to update the allocation of the context
   926  func (r *AllocRunner) Update(update *structs.Allocation) {
   927  	select {
   928  	case r.updateCh <- update:
   929  	default:
   930  		r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
   931  	}
   932  }
   933  
   934  // StatsReporter returns an interface to query resource usage statistics of an
   935  // allocation
   936  func (r *AllocRunner) StatsReporter() AllocStatsReporter {
   937  	return r
   938  }
   939  
   940  // getTaskRunners is a helper that returns a copy of the task runners list using
   941  // the taskLock.
   942  func (r *AllocRunner) getTaskRunners() []*TaskRunner {
   943  	// Get the task runners
   944  	r.taskLock.RLock()
   945  	defer r.taskLock.RUnlock()
   946  	runners := make([]*TaskRunner, 0, len(r.tasks))
   947  	for _, tr := range r.tasks {
   948  		runners = append(runners, tr)
   949  	}
   950  	return runners
   951  }
   952  
   953  // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
   954  // the allocation stats will only include the given task.
   955  func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
   956  	astat := &cstructs.AllocResourceUsage{
   957  		Tasks: make(map[string]*cstructs.TaskResourceUsage),
   958  	}
   959  
   960  	var flat []*cstructs.TaskResourceUsage
   961  	if taskFilter != "" {
   962  		r.taskLock.RLock()
   963  		tr, ok := r.tasks[taskFilter]
   964  		r.taskLock.RUnlock()
   965  		if !ok {
   966  			return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter)
   967  		}
   968  		l := tr.LatestResourceUsage()
   969  		if l != nil {
   970  			astat.Tasks[taskFilter] = l
   971  			flat = []*cstructs.TaskResourceUsage{l}
   972  			astat.Timestamp = l.Timestamp
   973  		}
   974  	} else {
   975  		// Get the task runners
   976  		runners := r.getTaskRunners()
   977  		for _, tr := range runners {
   978  			l := tr.LatestResourceUsage()
   979  			if l != nil {
   980  				astat.Tasks[tr.task.Name] = l
   981  				flat = append(flat, l)
   982  				if l.Timestamp > astat.Timestamp {
   983  					astat.Timestamp = l.Timestamp
   984  				}
   985  			}
   986  		}
   987  	}
   988  
   989  	astat.ResourceUsage = sumTaskResourceUsage(flat)
   990  	return astat, nil
   991  }
   992  
   993  // sumTaskResourceUsage takes a set of task resources and sums their resources
   994  func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
   995  	summed := &cstructs.ResourceUsage{
   996  		MemoryStats: &cstructs.MemoryStats{},
   997  		CpuStats:    &cstructs.CpuStats{},
   998  	}
   999  	for _, usage := range usages {
  1000  		summed.Add(usage.ResourceUsage)
  1001  	}
  1002  	return summed
  1003  }
  1004  
  1005  // shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
  1006  // checks if the current running allocation is behind and should be updated.
  1007  func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool {
  1008  	r.allocLock.Lock()
  1009  	defer r.allocLock.Unlock()
  1010  	return r.alloc.AllocModifyIndex < serverIndex
  1011  }
  1012  
  1013  // Destroy is used to indicate that the allocation context should be destroyed
  1014  func (r *AllocRunner) Destroy() {
  1015  	// Lock when closing the context as that gives the save state code
  1016  	// serialization.
  1017  	r.allocStateLock.Lock()
  1018  	defer r.allocStateLock.Unlock()
  1019  
  1020  	r.exitFn()
  1021  	r.allocBroadcast.Close()
  1022  }
  1023  
  1024  // WaitCh returns a channel to wait for termination
  1025  func (r *AllocRunner) WaitCh() <-chan struct{} {
  1026  	return r.waitCh
  1027  }