github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/client/allocrunner/alloc_runner.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log"
     7  	"path/filepath"
     8  	"sync"
     9  	"time"
    10  
    11  	metrics "github.com/armon/go-metrics"
    12  	"github.com/boltdb/bolt"
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/nomad/client/allocdir"
    15  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner"
    16  	"github.com/hashicorp/nomad/client/config"
    17  	consulApi "github.com/hashicorp/nomad/client/consul"
    18  	"github.com/hashicorp/nomad/client/state"
    19  	"github.com/hashicorp/nomad/client/vaultclient"
    20  	"github.com/hashicorp/nomad/helper"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  
    23  	cstructs "github.com/hashicorp/nomad/client/structs"
    24  )
    25  
    26  var (
    27  	// The following are the key paths written to the state database
    28  	allocRunnerStateAllocKey     = []byte("alloc")
    29  	allocRunnerStateImmutableKey = []byte("immutable")
    30  	allocRunnerStateMutableKey   = []byte("mutable")
    31  	allocRunnerStateAllocDirKey  = []byte("alloc-dir")
    32  )
    33  
    34  // AllocStateUpdater is used to update the status of an allocation
    35  type AllocStateUpdater func(alloc *structs.Allocation)
    36  
    37  type AllocStatsReporter interface {
    38  	LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
    39  }
    40  
    41  // AllocRunner is used to wrap an allocation and provide the execution context.
    42  type AllocRunner struct {
    43  	config  *config.Config
    44  	updater AllocStateUpdater
    45  	logger  *log.Logger
    46  
    47  	// allocID is the ID of this runner's allocation. Since it does not
    48  	// change for the lifetime of the AllocRunner it is safe to read
    49  	// without acquiring a lock (unlike alloc).
    50  	allocID string
    51  
    52  	alloc                  *structs.Allocation
    53  	allocClientStatus      string // Explicit status of allocation. Set when there are failures
    54  	allocClientDescription string
    55  	allocHealth            *bool     // Whether the allocation is healthy
    56  	allocHealthTime        time.Time // Time at which allocation health has been set
    57  	allocBroadcast         *cstructs.AllocBroadcaster
    58  	allocLock              sync.Mutex
    59  
    60  	dirtyCh chan struct{}
    61  
    62  	allocDir     *allocdir.AllocDir
    63  	allocDirLock sync.Mutex
    64  
    65  	tasks      map[string]*taskrunner.TaskRunner
    66  	taskStates map[string]*structs.TaskState
    67  	restored   map[string]struct{}
    68  	taskLock   sync.RWMutex
    69  
    70  	taskStatusLock sync.RWMutex
    71  
    72  	updateCh chan *structs.Allocation
    73  
    74  	vaultClient  vaultclient.VaultClient
    75  	consulClient consulApi.ConsulServiceAPI
    76  
    77  	// prevAlloc allows for Waiting until a previous allocation exits and
    78  	// the migrates it data. If sticky volumes aren't used and there's no
    79  	// previous allocation a noop implementation is used so it always safe
    80  	// to call.
    81  	prevAlloc prevAllocWatcher
    82  
    83  	// ctx is cancelled with exitFn to cause the alloc to be destroyed
    84  	// (stopped and GC'd).
    85  	ctx    context.Context
    86  	exitFn context.CancelFunc
    87  
    88  	// waitCh is closed when the Run method exits. At that point the alloc
    89  	// has stopped and been GC'd.
    90  	waitCh chan struct{}
    91  
    92  	// State related fields
    93  	// stateDB is used to store the alloc runners state
    94  	stateDB        *bolt.DB
    95  	allocStateLock sync.Mutex
    96  
    97  	// persistedEval is the last persisted evaluation ID. Since evaluation
    98  	// IDs change on every allocation update we only need to persist the
    99  	// allocation when its eval ID != the last persisted eval ID.
   100  	persistedEvalLock sync.Mutex
   101  	persistedEval     string
   102  
   103  	// immutablePersisted and allocDirPersisted are used to track whether the
   104  	// immutable data and the alloc dir have been persisted. Once persisted we
   105  	// can lower write volume by not re-writing these values
   106  	immutablePersisted bool
   107  	allocDirPersisted  bool
   108  
   109  	// baseLabels are used when emitting tagged metrics. All alloc runner metrics
   110  	// will have these tags, and optionally more.
   111  	baseLabels []metrics.Label
   112  }
   113  
   114  // allocRunnerAllocState is state that only has to be written when the alloc
   115  // changes.
   116  type allocRunnerAllocState struct {
   117  	Alloc *structs.Allocation
   118  }
   119  
   120  // allocRunnerImmutableState is state that only has to be written once.
   121  type allocRunnerImmutableState struct {
   122  	Version string
   123  }
   124  
   125  // allocRunnerMutableState is state that has to be written on each save as it
   126  // changes over the life-cycle of the alloc_runner.
   127  type allocRunnerMutableState struct {
   128  	AllocClientStatus      string
   129  	AllocClientDescription string
   130  	TaskStates             map[string]*structs.TaskState
   131  	DeploymentStatus       *structs.AllocDeploymentStatus
   132  }
   133  
   134  // NewAllocRunner is used to create a new allocation context
   135  func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater,
   136  	alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient consulApi.ConsulServiceAPI,
   137  	prevAlloc prevAllocWatcher) *AllocRunner {
   138  
   139  	ar := &AllocRunner{
   140  		config:         config,
   141  		stateDB:        stateDB,
   142  		updater:        updater,
   143  		logger:         logger,
   144  		alloc:          alloc,
   145  		allocID:        alloc.ID,
   146  		allocBroadcast: cstructs.NewAllocBroadcaster(8),
   147  		prevAlloc:      prevAlloc,
   148  		dirtyCh:        make(chan struct{}, 1),
   149  		allocDir:       allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)),
   150  		tasks:          make(map[string]*taskrunner.TaskRunner),
   151  		taskStates:     copyTaskStates(alloc.TaskStates),
   152  		restored:       make(map[string]struct{}),
   153  		updateCh:       make(chan *structs.Allocation, 64),
   154  		waitCh:         make(chan struct{}),
   155  		vaultClient:    vaultClient,
   156  		consulClient:   consulClient,
   157  	}
   158  
   159  	// TODO Should be passed a context
   160  	ar.ctx, ar.exitFn = context.WithCancel(context.TODO())
   161  
   162  	return ar
   163  }
   164  
   165  // setBaseLabels creates the set of base labels. This should be called after
   166  // Restore has been called so the allocation is guaranteed to be loaded
   167  func (r *AllocRunner) setBaseLabels() {
   168  	r.baseLabels = make([]metrics.Label, 0, 3)
   169  
   170  	if r.alloc.Job != nil {
   171  		r.baseLabels = append(r.baseLabels, metrics.Label{
   172  			Name:  "job",
   173  			Value: r.alloc.Job.Name,
   174  		})
   175  	}
   176  	if r.alloc.TaskGroup != "" {
   177  		r.baseLabels = append(r.baseLabels, metrics.Label{
   178  			Name:  "task_group",
   179  			Value: r.alloc.TaskGroup,
   180  		})
   181  	}
   182  	if r.config != nil && r.config.Node != nil {
   183  		r.baseLabels = append(r.baseLabels, metrics.Label{
   184  			Name:  "node_id",
   185  			Value: r.config.Node.ID,
   186  		})
   187  	}
   188  }
   189  
   190  // pre060StateFilePath returns the path to our state file that would have been
   191  // written pre v0.6.0
   192  // COMPAT: Remove in 0.7.0
   193  func (r *AllocRunner) pre060StateFilePath() string {
   194  	r.allocLock.Lock()
   195  	defer r.allocLock.Unlock()
   196  	path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json")
   197  	return path
   198  }
   199  
   200  // RestoreState is used to restore the state of the alloc runner
   201  func (r *AllocRunner) RestoreState() error {
   202  	err := r.stateDB.View(func(tx *bolt.Tx) error {
   203  		bkt, err := state.GetAllocationBucket(tx, r.allocID)
   204  		if err != nil {
   205  			return fmt.Errorf("failed to get allocation bucket: %v", err)
   206  		}
   207  
   208  		// Get the state objects
   209  		var mutable allocRunnerMutableState
   210  		var immutable allocRunnerImmutableState
   211  		var allocState allocRunnerAllocState
   212  		var allocDir allocdir.AllocDir
   213  
   214  		if err := state.GetObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil {
   215  			return fmt.Errorf("failed to read alloc runner alloc state: %v", err)
   216  		}
   217  		if err := state.GetObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   218  			return fmt.Errorf("failed to read alloc runner immutable state: %v", err)
   219  		}
   220  		if err := state.GetObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil {
   221  			return fmt.Errorf("failed to read alloc runner mutable state: %v", err)
   222  		}
   223  		if err := state.GetObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil {
   224  			return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err)
   225  		}
   226  
   227  		// Populate the fields
   228  		r.alloc = allocState.Alloc
   229  		r.allocDir = &allocDir
   230  		r.allocClientStatus = mutable.AllocClientStatus
   231  		r.allocClientDescription = mutable.AllocClientDescription
   232  		r.taskStates = mutable.TaskStates
   233  		r.alloc.ClientStatus = getClientStatus(r.taskStates)
   234  		r.alloc.DeploymentStatus = mutable.DeploymentStatus
   235  		return nil
   236  	})
   237  
   238  	if err != nil {
   239  		return fmt.Errorf("failed to read allocation state: %v", err)
   240  	}
   241  
   242  	var snapshotErrors multierror.Error
   243  	if r.alloc == nil {
   244  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation"))
   245  	}
   246  	if r.allocDir == nil {
   247  		snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir"))
   248  	}
   249  	if e := snapshotErrors.ErrorOrNil(); e != nil {
   250  		return e
   251  	}
   252  
   253  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   254  	if tg == nil {
   255  		return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup)
   256  	}
   257  
   258  	// Restore the task runners
   259  	taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled)
   260  	var mErr multierror.Error
   261  	for _, task := range tg.Tasks {
   262  		name := task.Name
   263  		state := r.taskStates[name]
   264  
   265  		// Nomad exited before task could start, nothing to restore.
   266  		// AllocRunner.Run will start a new TaskRunner for this task
   267  		if state == nil {
   268  			continue
   269  		}
   270  
   271  		// Mark the task as restored.
   272  		r.restored[name] = struct{}{}
   273  
   274  		td, ok := r.allocDir.TaskDirs[name]
   275  		if !ok {
   276  			// Create the task dir metadata if it doesn't exist.
   277  			// Since task dirs are created during r.Run() the
   278  			// client may save state and exit before all task dirs
   279  			// are created
   280  			td = r.allocDir.NewTaskDir(name)
   281  		}
   282  
   283  		// Skip tasks in terminal states.
   284  		if state.State == structs.TaskStateDead {
   285  			continue
   286  		}
   287  
   288  		tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient)
   289  		r.tasks[name] = tr
   290  
   291  		if restartReason, err := tr.RestoreState(); err != nil {
   292  			r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err)
   293  			mErr.Errors = append(mErr.Errors, err)
   294  		} else if !r.alloc.TerminalStatus() {
   295  			// Only start if the alloc isn't in a terminal status.
   296  			go tr.Run()
   297  
   298  			// Restart task runner if RestoreState gave a reason
   299  			if restartReason != "" {
   300  				r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason)
   301  				const failure = false
   302  				tr.Restart("upgrade", restartReason, failure)
   303  			}
   304  		} else {
   305  			// XXX This does nothing and is broken since the task runner is not
   306  			// running yet, and there is nothing listening to the destroy ch.
   307  			// XXX When a single task is dead in the allocation we should kill
   308  			// all the task. This currently does NOT happen. Re-enable test:
   309  			// TestAllocRunner_TaskLeader_StopRestoredTG
   310  			tr.Destroy(taskDestroyEvent)
   311  		}
   312  	}
   313  
   314  	return mErr.ErrorOrNil()
   315  }
   316  
   317  // SaveState is used to snapshot the state of the alloc runner
   318  // if the fullSync is marked as false only the state of the Alloc Runner
   319  // is snapshotted. If fullSync is marked as true, we snapshot
   320  // all the Task Runners associated with the Alloc
   321  func (r *AllocRunner) SaveState() error {
   322  	if err := r.saveAllocRunnerState(); err != nil {
   323  		return err
   324  	}
   325  
   326  	// Save state for each task
   327  	runners := r.getTaskRunners()
   328  	var mErr multierror.Error
   329  	for _, tr := range runners {
   330  		if err := tr.SaveState(); err != nil {
   331  			mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v",
   332  				r.allocID, tr.Name(), err))
   333  		}
   334  	}
   335  	return mErr.ErrorOrNil()
   336  }
   337  
   338  func (r *AllocRunner) saveAllocRunnerState() error {
   339  	r.allocStateLock.Lock()
   340  	defer r.allocStateLock.Unlock()
   341  
   342  	if r.ctx.Err() == context.Canceled {
   343  		return nil
   344  	}
   345  
   346  	// Grab all the relevant data
   347  	alloc := r.Alloc()
   348  
   349  	r.allocLock.Lock()
   350  	allocClientStatus := r.allocClientStatus
   351  	allocClientDescription := r.allocClientDescription
   352  	r.allocLock.Unlock()
   353  
   354  	r.allocDirLock.Lock()
   355  	allocDir := r.allocDir.Copy()
   356  	r.allocDirLock.Unlock()
   357  
   358  	// Start the transaction.
   359  	return r.stateDB.Batch(func(tx *bolt.Tx) error {
   360  
   361  		// Grab the allocation bucket
   362  		allocBkt, err := state.GetAllocationBucket(tx, r.allocID)
   363  		if err != nil {
   364  			return fmt.Errorf("failed to retrieve allocation bucket: %v", err)
   365  		}
   366  
   367  		// Write the allocation if the eval has changed
   368  		r.persistedEvalLock.Lock()
   369  		lastPersisted := r.persistedEval
   370  		r.persistedEvalLock.Unlock()
   371  		if alloc.EvalID != lastPersisted {
   372  			allocState := &allocRunnerAllocState{
   373  				Alloc: alloc,
   374  			}
   375  
   376  			if err := state.PutObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil {
   377  				return fmt.Errorf("failed to write alloc_runner alloc state: %v", err)
   378  			}
   379  
   380  			tx.OnCommit(func() {
   381  				r.persistedEvalLock.Lock()
   382  				r.persistedEval = alloc.EvalID
   383  				r.persistedEvalLock.Unlock()
   384  			})
   385  		}
   386  
   387  		// Write immutable data iff it hasn't been written yet
   388  		if !r.immutablePersisted {
   389  			immutable := &allocRunnerImmutableState{
   390  				Version: r.config.Version.VersionNumber(),
   391  			}
   392  
   393  			if err := state.PutObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil {
   394  				return fmt.Errorf("failed to write alloc_runner immutable state: %v", err)
   395  			}
   396  
   397  			tx.OnCommit(func() {
   398  				r.immutablePersisted = true
   399  			})
   400  		}
   401  
   402  		// Write the alloc dir data if it hasn't been written before and it exists.
   403  		if !r.allocDirPersisted && allocDir != nil {
   404  			if err := state.PutObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil {
   405  				return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err)
   406  			}
   407  
   408  			tx.OnCommit(func() {
   409  				r.allocDirPersisted = true
   410  			})
   411  		}
   412  
   413  		// Write the mutable state every time
   414  		mutable := &allocRunnerMutableState{
   415  			AllocClientStatus:      allocClientStatus,
   416  			AllocClientDescription: allocClientDescription,
   417  			TaskStates:             alloc.TaskStates,
   418  			DeploymentStatus:       alloc.DeploymentStatus,
   419  		}
   420  
   421  		if err := state.PutObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil {
   422  			return fmt.Errorf("failed to write alloc_runner mutable state: %v", err)
   423  		}
   424  
   425  		return nil
   426  	})
   427  }
   428  
   429  // DestroyState is used to cleanup after ourselves
   430  func (r *AllocRunner) DestroyState() error {
   431  	r.allocStateLock.Lock()
   432  	defer r.allocStateLock.Unlock()
   433  
   434  	return r.stateDB.Update(func(tx *bolt.Tx) error {
   435  		if err := state.DeleteAllocationBucket(tx, r.allocID); err != nil {
   436  			return fmt.Errorf("failed to delete allocation bucket: %v", err)
   437  		}
   438  		return nil
   439  	})
   440  }
   441  
   442  // DestroyContext is used to destroy the context
   443  func (r *AllocRunner) DestroyContext() error {
   444  	return r.allocDir.Destroy()
   445  }
   446  
   447  // GetAllocDir returns the alloc dir for the alloc runner
   448  func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir {
   449  	return r.allocDir
   450  }
   451  
   452  // GetListener returns a listener for updates broadcast by this alloc runner.
   453  // Callers are responsible for calling Close on their Listener.
   454  func (r *AllocRunner) GetListener() *cstructs.AllocListener {
   455  	return r.allocBroadcast.Listen()
   456  }
   457  
   458  // copyTaskStates returns a copy of the passed task states.
   459  func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState {
   460  	copy := make(map[string]*structs.TaskState, len(states))
   461  	for task, state := range states {
   462  		copy[task] = state.Copy()
   463  	}
   464  	return copy
   465  }
   466  
   467  // finalizeTerminalAlloc sets any missing required fields like
   468  // finishedAt in the alloc runner's task States. finishedAt is used
   469  // to calculate reschedule time for failed allocs, so we make sure that
   470  // it is set
   471  func (r *AllocRunner) finalizeTerminalAlloc(alloc *structs.Allocation) {
   472  	if !alloc.ClientTerminalStatus() {
   473  		return
   474  	}
   475  	r.taskStatusLock.Lock()
   476  	defer r.taskStatusLock.Unlock()
   477  
   478  	group := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   479  	if r.taskStates == nil {
   480  		r.taskStates = make(map[string]*structs.TaskState)
   481  	}
   482  	now := time.Now()
   483  	for _, task := range group.Tasks {
   484  		ts, ok := r.taskStates[task.Name]
   485  		if !ok {
   486  			ts = &structs.TaskState{}
   487  			r.taskStates[task.Name] = ts
   488  		}
   489  		if ts.FinishedAt.IsZero() {
   490  			ts.FinishedAt = now
   491  		}
   492  	}
   493  	alloc.TaskStates = copyTaskStates(r.taskStates)
   494  }
   495  
   496  // Alloc returns the associated allocation
   497  func (r *AllocRunner) Alloc() *structs.Allocation {
   498  	r.allocLock.Lock()
   499  
   500  	// Don't do a deep copy of the job
   501  	alloc := r.alloc.CopySkipJob()
   502  
   503  	// The status has explicitly been set.
   504  	if r.allocClientStatus != "" || r.allocClientDescription != "" {
   505  		alloc.ClientStatus = r.allocClientStatus
   506  		alloc.ClientDescription = r.allocClientDescription
   507  
   508  		// Copy over the task states so we don't lose them
   509  		r.taskStatusLock.RLock()
   510  		alloc.TaskStates = copyTaskStates(r.taskStates)
   511  		r.taskStatusLock.RUnlock()
   512  
   513  		r.allocLock.Unlock()
   514  		r.finalizeTerminalAlloc(alloc)
   515  		return alloc
   516  	}
   517  
   518  	// The health has been set
   519  	if r.allocHealth != nil {
   520  		if alloc.DeploymentStatus == nil {
   521  			alloc.DeploymentStatus = &structs.AllocDeploymentStatus{}
   522  		}
   523  		alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth)
   524  		alloc.DeploymentStatus.Timestamp = r.allocHealthTime
   525  	}
   526  	r.allocLock.Unlock()
   527  
   528  	// Scan the task states to determine the status of the alloc
   529  	r.taskStatusLock.RLock()
   530  	alloc.TaskStates = copyTaskStates(r.taskStates)
   531  	alloc.ClientStatus = getClientStatus(r.taskStates)
   532  	r.taskStatusLock.RUnlock()
   533  
   534  	// If the client status is failed and we are part of a deployment, mark the
   535  	// alloc as unhealthy. This guards against the watcher not be started.
   536  	r.allocLock.Lock()
   537  	if alloc.ClientStatus == structs.AllocClientStatusFailed &&
   538  		alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() {
   539  		alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
   540  			Healthy: helper.BoolToPtr(false),
   541  		}
   542  	}
   543  	r.allocLock.Unlock()
   544  	r.finalizeTerminalAlloc(alloc)
   545  	return alloc
   546  }
   547  
   548  // getClientStatus takes in the task states for a given allocation and computes
   549  // the client status
   550  func getClientStatus(taskStates map[string]*structs.TaskState) string {
   551  	var pending, running, dead, failed bool
   552  	for _, state := range taskStates {
   553  		switch state.State {
   554  		case structs.TaskStateRunning:
   555  			running = true
   556  		case structs.TaskStatePending:
   557  			pending = true
   558  		case structs.TaskStateDead:
   559  			if state.Failed {
   560  				failed = true
   561  			} else {
   562  				dead = true
   563  			}
   564  		}
   565  	}
   566  
   567  	// Determine the alloc status
   568  	if failed {
   569  		return structs.AllocClientStatusFailed
   570  	} else if running {
   571  		return structs.AllocClientStatusRunning
   572  	} else if pending {
   573  		return structs.AllocClientStatusPending
   574  	} else if dead {
   575  		return structs.AllocClientStatusComplete
   576  	}
   577  
   578  	return ""
   579  }
   580  
   581  // dirtySyncState is used to watch for state being marked dirty to sync
   582  func (r *AllocRunner) dirtySyncState() {
   583  	for {
   584  		select {
   585  		case <-r.dirtyCh:
   586  			if err := r.syncStatus(); err != nil {
   587  				// Only WARN instead of ERR because we continue on
   588  				r.logger.Printf("[WARN] client: error persisting alloc %q state: %v",
   589  					r.allocID, err)
   590  			}
   591  		case <-r.ctx.Done():
   592  			return
   593  		}
   594  	}
   595  }
   596  
   597  // syncStatus is used to run and sync the status when it changes
   598  func (r *AllocRunner) syncStatus() error {
   599  	// Get a copy of our alloc, update status server side and sync to disk
   600  	alloc := r.Alloc()
   601  	r.updater(alloc)
   602  	r.sendBroadcast(alloc)
   603  	return r.saveAllocRunnerState()
   604  }
   605  
   606  // sendBroadcast broadcasts an alloc update.
   607  func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) {
   608  	// Try to send the alloc up to three times with a delay to allow recovery.
   609  	sent := false
   610  	for i := 0; i < 3; i++ {
   611  		if sent = r.allocBroadcast.Send(alloc); sent {
   612  			break
   613  		}
   614  		time.Sleep(500 * time.Millisecond)
   615  	}
   616  	if !sent {
   617  		r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID)
   618  	}
   619  }
   620  
   621  // setStatus is used to update the allocation status
   622  func (r *AllocRunner) setStatus(status, desc string) {
   623  	r.allocLock.Lock()
   624  	r.allocClientStatus = status
   625  	r.allocClientDescription = desc
   626  	r.allocLock.Unlock()
   627  	select {
   628  	case r.dirtyCh <- struct{}{}:
   629  	default:
   630  	}
   631  }
   632  
   633  // setTaskState is used to set the status of a task. If lazySync is set then the
   634  // event is appended but not synced with the server. If state is omitted, the
   635  // last known state is used.
   636  func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) {
   637  	r.taskStatusLock.Lock()
   638  	defer r.taskStatusLock.Unlock()
   639  	taskState, ok := r.taskStates[taskName]
   640  	if !ok {
   641  		taskState = &structs.TaskState{}
   642  		r.taskStates[taskName] = taskState
   643  	}
   644  
   645  	// Set the tasks state.
   646  	if event != nil {
   647  		if event.FailsTask {
   648  			taskState.Failed = true
   649  		}
   650  		if event.Type == structs.TaskRestarting {
   651  			if !r.config.DisableTaggedMetrics {
   652  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"},
   653  					1, r.baseLabels)
   654  			}
   655  			if r.config.BackwardsCompatibleMetrics {
   656  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1)
   657  			}
   658  			taskState.Restarts++
   659  			taskState.LastRestart = time.Unix(0, event.Time)
   660  		}
   661  		r.appendTaskEvent(taskState, event)
   662  	}
   663  
   664  	if lazySync {
   665  		return
   666  	}
   667  
   668  	// If the state hasn't been set use the existing state.
   669  	if state == "" {
   670  		state = taskState.State
   671  		if taskState.State == "" {
   672  			state = structs.TaskStatePending
   673  		}
   674  	}
   675  
   676  	switch state {
   677  	case structs.TaskStateRunning:
   678  		// Capture the start time if it is just starting
   679  		if taskState.State != structs.TaskStateRunning {
   680  			taskState.StartedAt = time.Now().UTC()
   681  			if !r.config.DisableTaggedMetrics {
   682  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"},
   683  					1, r.baseLabels)
   684  			}
   685  			if r.config.BackwardsCompatibleMetrics {
   686  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1)
   687  			}
   688  		}
   689  	case structs.TaskStateDead:
   690  		// Capture the finished time if not already set
   691  		if taskState.FinishedAt.IsZero() {
   692  			taskState.FinishedAt = time.Now().UTC()
   693  		}
   694  
   695  		// Find all tasks that are not the one that is dead and check if the one
   696  		// that is dead is a leader
   697  		var otherTaskRunners []*taskrunner.TaskRunner
   698  		var otherTaskNames []string
   699  		leader := false
   700  		for task, tr := range r.tasks {
   701  			if task != taskName {
   702  				otherTaskRunners = append(otherTaskRunners, tr)
   703  				otherTaskNames = append(otherTaskNames, task)
   704  			} else if tr.IsLeader() {
   705  				leader = true
   706  			}
   707  		}
   708  
   709  		// Emitting metrics to indicate task complete and failures
   710  		if taskState.Failed {
   711  			if !r.config.DisableTaggedMetrics {
   712  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"},
   713  					1, r.baseLabels)
   714  			}
   715  			if r.config.BackwardsCompatibleMetrics {
   716  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1)
   717  			}
   718  		} else {
   719  			if !r.config.DisableTaggedMetrics {
   720  				metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"},
   721  					1, r.baseLabels)
   722  			}
   723  			if r.config.BackwardsCompatibleMetrics {
   724  				metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1)
   725  			}
   726  		}
   727  
   728  		// If the task failed, we should kill all the other tasks in the task group.
   729  		if taskState.Failed {
   730  			for _, tr := range otherTaskRunners {
   731  				tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName))
   732  			}
   733  			if len(otherTaskRunners) > 0 {
   734  				r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames)
   735  			}
   736  		} else if leader {
   737  			// If the task was a leader task we should kill all the other tasks.
   738  			for _, tr := range otherTaskRunners {
   739  				tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead))
   740  			}
   741  			if len(otherTaskRunners) > 0 {
   742  				r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames)
   743  			}
   744  		}
   745  	}
   746  
   747  	// Store the new state
   748  	taskState.State = state
   749  
   750  	select {
   751  	case r.dirtyCh <- struct{}{}:
   752  	default:
   753  	}
   754  }
   755  
   756  // appendTaskEvent updates the task status by appending the new event.
   757  func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) {
   758  	capacity := 10
   759  	if state.Events == nil {
   760  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   761  	}
   762  
   763  	// If we hit capacity, then shift it.
   764  	if len(state.Events) == capacity {
   765  		old := state.Events
   766  		state.Events = make([]*structs.TaskEvent, 0, capacity)
   767  		state.Events = append(state.Events, old[1:]...)
   768  	}
   769  
   770  	state.Events = append(state.Events, event)
   771  }
   772  
   773  // Run is a long running goroutine used to manage an allocation
   774  func (r *AllocRunner) Run() {
   775  	defer close(r.waitCh)
   776  	r.setBaseLabels()
   777  	go r.dirtySyncState()
   778  
   779  	// Find the task group to run in the allocation
   780  	alloc := r.Alloc()
   781  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
   782  	if tg == nil {
   783  		r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup)
   784  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup))
   785  		return
   786  	}
   787  
   788  	// Build allocation directory (idempotent)
   789  	r.allocDirLock.Lock()
   790  	err := r.allocDir.Build()
   791  	r.allocDirLock.Unlock()
   792  
   793  	if err != nil {
   794  		r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err)
   795  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup))
   796  		return
   797  	}
   798  
   799  	// Wait for a previous alloc - if any - to terminate
   800  	if err := r.prevAlloc.Wait(r.ctx); err != nil {
   801  		if err == context.Canceled {
   802  			return
   803  		}
   804  		r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err))
   805  		return
   806  	}
   807  
   808  	// Wait for data to be migrated from a previous alloc if applicable
   809  	if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil {
   810  		if err == context.Canceled {
   811  			return
   812  		}
   813  
   814  		// Soft-fail on migration errors
   815  		r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err)
   816  
   817  		// Recreate alloc dir to ensure a clean slate
   818  		r.allocDir.Destroy()
   819  		if err := r.allocDir.Build(); err != nil {
   820  			r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err)
   821  			r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup))
   822  			return
   823  		}
   824  	}
   825  
   826  	// Check if the allocation is in a terminal status. In this case, we don't
   827  	// start any of the task runners and directly wait for the destroy signal to
   828  	// clean up the allocation.
   829  	if alloc.TerminalStatus() {
   830  		r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID)
   831  		// mark this allocation as completed if it is not already in a
   832  		// terminal state
   833  		if !alloc.Terminated() {
   834  			r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state")
   835  		}
   836  		r.handleDestroy()
   837  		r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   838  		return
   839  	}
   840  
   841  	// Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution
   842  	if !r.config.DisableTaggedMetrics {
   843  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"},
   844  			1, r.baseLabels)
   845  	}
   846  	if r.config.BackwardsCompatibleMetrics {
   847  		metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1)
   848  	}
   849  
   850  	// Start the watcher
   851  	wCtx, watcherCancel := context.WithCancel(r.ctx)
   852  	go r.watchHealth(wCtx)
   853  
   854  	// Start the task runners
   855  	r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID)
   856  	r.taskLock.Lock()
   857  	for _, task := range tg.Tasks {
   858  		if _, ok := r.restored[task.Name]; ok {
   859  			continue
   860  		}
   861  
   862  		r.allocDirLock.Lock()
   863  		taskdir := r.allocDir.NewTaskDir(task.Name)
   864  		r.allocDirLock.Unlock()
   865  
   866  		tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient)
   867  		r.tasks[task.Name] = tr
   868  		tr.MarkReceived()
   869  
   870  		go tr.Run()
   871  	}
   872  	r.taskLock.Unlock()
   873  
   874  	// taskDestroyEvent contains an event that caused the destruction of a task
   875  	// in the allocation.
   876  	var taskDestroyEvent *structs.TaskEvent
   877  
   878  OUTER:
   879  	// Wait for updates
   880  	for {
   881  		select {
   882  		case update := <-r.updateCh:
   883  			// Store the updated allocation.
   884  			r.allocLock.Lock()
   885  
   886  			// If the deployment ids have changed clear the health
   887  			if r.alloc.DeploymentID != update.DeploymentID {
   888  				r.allocHealth = nil
   889  				r.allocHealthTime = time.Time{}
   890  			}
   891  
   892  			r.alloc = update
   893  			r.allocLock.Unlock()
   894  
   895  			// Create a new watcher
   896  			watcherCancel()
   897  			wCtx, watcherCancel = context.WithCancel(r.ctx)
   898  			go r.watchHealth(wCtx)
   899  
   900  			// Check if we're in a terminal status
   901  			if update.TerminalStatus() {
   902  				taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   903  				break OUTER
   904  			}
   905  
   906  			// Update the task groups
   907  			runners := r.getTaskRunners()
   908  			for _, tr := range runners {
   909  				tr.Update(update)
   910  			}
   911  
   912  			if err := r.syncStatus(); err != nil {
   913  				r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v",
   914  					r.allocID, err)
   915  			}
   916  
   917  		case <-r.ctx.Done():
   918  			taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled)
   919  			break OUTER
   920  		}
   921  	}
   922  
   923  	// Kill the task runners
   924  	r.destroyTaskRunners(taskDestroyEvent)
   925  
   926  	// Block until we should destroy the state of the alloc
   927  	r.handleDestroy()
   928  
   929  	// Free up the context. It has likely exited already
   930  	watcherCancel()
   931  
   932  	r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID)
   933  }
   934  
   935  // destroyTaskRunners destroys the task runners, waits for them to terminate and
   936  // then saves state.
   937  func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) {
   938  	// First destroy the leader if one exists
   939  	tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup)
   940  	leader := ""
   941  	for _, task := range tg.Tasks {
   942  		if task.Leader {
   943  			leader = task.Name
   944  			break
   945  		}
   946  	}
   947  	if leader != "" {
   948  		r.taskLock.RLock()
   949  		tr := r.tasks[leader]
   950  		r.taskLock.RUnlock()
   951  
   952  		// Dead tasks don't have a task runner created so guard against
   953  		// the leader being dead when this AR was saved.
   954  		if tr == nil {
   955  			r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped",
   956  				r.allocID, leader, r.alloc.TaskGroup)
   957  		} else {
   958  			r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first",
   959  				r.allocID, leader, r.alloc.TaskGroup)
   960  			tr.Destroy(destroyEvent)
   961  			<-tr.WaitCh()
   962  		}
   963  	}
   964  
   965  	// Then destroy non-leader tasks concurrently
   966  	r.taskLock.RLock()
   967  	for name, tr := range r.tasks {
   968  		if name != leader {
   969  			tr.Destroy(destroyEvent)
   970  		}
   971  	}
   972  	r.taskLock.RUnlock()
   973  
   974  	// Wait for termination of the task runners
   975  	for _, tr := range r.getTaskRunners() {
   976  		<-tr.WaitCh()
   977  	}
   978  }
   979  
   980  // handleDestroy blocks till the AllocRunner should be destroyed and does the
   981  // necessary cleanup.
   982  func (r *AllocRunner) handleDestroy() {
   983  	// Final state sync. We do this to ensure that the server has the correct
   984  	// state as we wait for a destroy.
   985  	alloc := r.Alloc()
   986  
   987  	// Increment the destroy count for this alloc runner since this allocation is being removed from this client.
   988  	if !r.config.DisableTaggedMetrics {
   989  		metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"},
   990  			1, r.baseLabels)
   991  	}
   992  	if r.config.BackwardsCompatibleMetrics {
   993  		metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1)
   994  	}
   995  
   996  	// Broadcast and persist state synchronously
   997  	r.sendBroadcast(alloc)
   998  	if err := r.saveAllocRunnerState(); err != nil {
   999  		r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v",
  1000  			r.allocID, err)
  1001  	}
  1002  
  1003  	// Unmount any mounted directories as no tasks are running and makes
  1004  	// cleaning up Nomad's data directory simpler.
  1005  	if err := r.allocDir.UnmountAll(); err != nil {
  1006  		r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err)
  1007  	}
  1008  
  1009  	// Update the server with the alloc's status -- also marks the alloc as
  1010  	// being eligible for GC, so from this point on the alloc can be gc'd
  1011  	// at any time.
  1012  	r.updater(alloc)
  1013  
  1014  	for {
  1015  		select {
  1016  		case <-r.ctx.Done():
  1017  			if err := r.DestroyContext(); err != nil {
  1018  				r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v",
  1019  					r.allocID, err)
  1020  			}
  1021  			if err := r.DestroyState(); err != nil {
  1022  				r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v",
  1023  					r.allocID, err)
  1024  			}
  1025  
  1026  			return
  1027  		case <-r.updateCh:
  1028  			r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID)
  1029  		}
  1030  	}
  1031  }
  1032  
  1033  // IsWaiting returns true if this alloc is waiting on a previous allocation to
  1034  // terminate.
  1035  func (r *AllocRunner) IsWaiting() bool {
  1036  	return r.prevAlloc.IsWaiting()
  1037  }
  1038  
  1039  // IsMigrating returns true if this alloc is migrating data from a previous
  1040  // allocation.
  1041  func (r *AllocRunner) IsMigrating() bool {
  1042  	return r.prevAlloc.IsMigrating()
  1043  }
  1044  
  1045  // Update is used to update the allocation of the context
  1046  func (r *AllocRunner) Update(update *structs.Allocation) {
  1047  	select {
  1048  	case r.updateCh <- update:
  1049  	default:
  1050  		r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID)
  1051  	}
  1052  }
  1053  
  1054  // StatsReporter returns an interface to query resource usage statistics of an
  1055  // allocation
  1056  func (r *AllocRunner) StatsReporter() AllocStatsReporter {
  1057  	return r
  1058  }
  1059  
  1060  // getTaskRunners is a helper that returns a copy of the task runners list using
  1061  // the taskLock.
  1062  func (r *AllocRunner) getTaskRunners() []*taskrunner.TaskRunner {
  1063  	// Get the task runners
  1064  	r.taskLock.RLock()
  1065  	defer r.taskLock.RUnlock()
  1066  	runners := make([]*taskrunner.TaskRunner, 0, len(r.tasks))
  1067  	for _, tr := range r.tasks {
  1068  		runners = append(runners, tr)
  1069  	}
  1070  	return runners
  1071  }
  1072  
  1073  // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set
  1074  // the allocation stats will only include the given task.
  1075  func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) {
  1076  	astat := &cstructs.AllocResourceUsage{
  1077  		Tasks: make(map[string]*cstructs.TaskResourceUsage),
  1078  	}
  1079  
  1080  	var flat []*cstructs.TaskResourceUsage
  1081  	if taskFilter != "" {
  1082  		r.taskLock.RLock()
  1083  		tr, ok := r.tasks[taskFilter]
  1084  		r.taskLock.RUnlock()
  1085  		if !ok {
  1086  			return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter)
  1087  		}
  1088  		l := tr.LatestResourceUsage()
  1089  		if l != nil {
  1090  			astat.Tasks[taskFilter] = l
  1091  			flat = []*cstructs.TaskResourceUsage{l}
  1092  			astat.Timestamp = l.Timestamp
  1093  		}
  1094  	} else {
  1095  		// Get the task runners
  1096  		runners := r.getTaskRunners()
  1097  		for _, tr := range runners {
  1098  			l := tr.LatestResourceUsage()
  1099  			if l != nil {
  1100  				astat.Tasks[tr.Name()] = l
  1101  				flat = append(flat, l)
  1102  				if l.Timestamp > astat.Timestamp {
  1103  					astat.Timestamp = l.Timestamp
  1104  				}
  1105  			}
  1106  		}
  1107  	}
  1108  
  1109  	astat.ResourceUsage = sumTaskResourceUsage(flat)
  1110  	return astat, nil
  1111  }
  1112  
  1113  // sumTaskResourceUsage takes a set of task resources and sums their resources
  1114  func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage {
  1115  	summed := &cstructs.ResourceUsage{
  1116  		MemoryStats: &cstructs.MemoryStats{},
  1117  		CpuStats:    &cstructs.CpuStats{},
  1118  	}
  1119  	for _, usage := range usages {
  1120  		summed.Add(usage.ResourceUsage)
  1121  	}
  1122  	return summed
  1123  }
  1124  
  1125  // ShouldUpdate takes the AllocModifyIndex of an allocation sent from the server and
  1126  // checks if the current running allocation is behind and should be updated.
  1127  func (r *AllocRunner) ShouldUpdate(serverIndex uint64) bool {
  1128  	r.allocLock.Lock()
  1129  	defer r.allocLock.Unlock()
  1130  	return r.alloc.AllocModifyIndex < serverIndex
  1131  }
  1132  
  1133  // Destroy is used to indicate that the allocation context should be destroyed
  1134  func (r *AllocRunner) Destroy() {
  1135  	// Lock when closing the context as that gives the save state code
  1136  	// serialization.
  1137  	r.allocStateLock.Lock()
  1138  	defer r.allocStateLock.Unlock()
  1139  
  1140  	r.exitFn()
  1141  	r.allocBroadcast.Close()
  1142  }
  1143  
  1144  // IsDestroyed returns true if the AllocRunner is not running and has been
  1145  // destroyed (GC'd).
  1146  func (r *AllocRunner) IsDestroyed() bool {
  1147  	select {
  1148  	case <-r.waitCh:
  1149  		return true
  1150  	default:
  1151  		return false
  1152  	}
  1153  }
  1154  
  1155  // WaitCh returns a channel to wait for termination
  1156  func (r *AllocRunner) WaitCh() <-chan struct{} {
  1157  	return r.waitCh
  1158  }
  1159  
  1160  // AllocID returns the allocation ID of the allocation being run
  1161  func (r *AllocRunner) AllocID() string {
  1162  	if r == nil {
  1163  		return ""
  1164  	}
  1165  	return r.allocID
  1166  }