github.com/aminovpavel/nomad@v0.11.8/nomad/deploymentwatcher/deployment_watcher.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	log "github.com/hashicorp/go-hclog"
    10  	memdb "github.com/hashicorp/go-memdb"
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/uuid"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"golang.org/x/time/rate"
    16  )
    17  
    18  const (
    19  	// perJobEvalBatchPeriod is the batching length before creating an evaluation to
    20  	// trigger the scheduler when allocations are marked as healthy.
    21  	perJobEvalBatchPeriod = 1 * time.Second
    22  )
    23  
    24  var (
    25  	// allowRescheduleTransition is the transition that allows failed
    26  	// allocations part of a deployment to be rescheduled. We create a one off
    27  	// variable to avoid creating a new object for every request.
    28  	allowRescheduleTransition = &structs.DesiredTransition{
    29  		Reschedule: helper.BoolToPtr(true),
    30  	}
    31  )
    32  
    33  // deploymentTriggers are the set of functions required to trigger changes on
    34  // behalf of a deployment
    35  type deploymentTriggers interface {
    36  	// createUpdate is used to create allocation desired transition updates and
    37  	// an evaluation.
    38  	createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error)
    39  
    40  	// upsertJob is used to roll back a job when autoreverting for a deployment
    41  	upsertJob(job *structs.Job) (uint64, error)
    42  
    43  	// upsertDeploymentStatusUpdate is used to upsert a deployment status update
    44  	// and an optional evaluation and job to upsert
    45  	upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error)
    46  
    47  	// upsertDeploymentPromotion is used to promote canaries in a deployment
    48  	upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error)
    49  
    50  	// upsertDeploymentAllocHealth is used to set the health of allocations in a
    51  	// deployment
    52  	upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error)
    53  }
    54  
    55  // deploymentWatcher is used to watch a single deployment and trigger the
    56  // scheduler when allocation health transitions.
    57  type deploymentWatcher struct {
    58  	// queryLimiter is used to limit the rate of blocking queries
    59  	queryLimiter *rate.Limiter
    60  
    61  	// deploymentTriggers holds the methods required to trigger changes on behalf of the
    62  	// deployment
    63  	deploymentTriggers
    64  
    65  	// state is the state that is watched for state changes.
    66  	state *state.StateStore
    67  
    68  	// deploymentID is the deployment's ID being watched
    69  	deploymentID string
    70  
    71  	// deploymentUpdateCh is triggered when there is an updated deployment
    72  	deploymentUpdateCh chan struct{}
    73  
    74  	// d is the deployment being watched
    75  	d *structs.Deployment
    76  
    77  	// j is the job the deployment is for
    78  	j *structs.Job
    79  
    80  	// outstandingBatch marks whether an outstanding function exists to create
    81  	// the evaluation. Access should be done through the lock.
    82  	outstandingBatch bool
    83  
    84  	// outstandingAllowReplacements is the map of allocations that will be
    85  	// marked as allowing a replacement. Access should be done through the lock.
    86  	outstandingAllowReplacements map[string]*structs.DesiredTransition
    87  
    88  	// latestEval is the latest eval for the job. It is updated by the watch
    89  	// loop and any time an evaluation is created. The field should be accessed
    90  	// by holding the lock or using the setter and getter methods.
    91  	latestEval uint64
    92  
    93  	logger log.Logger
    94  	ctx    context.Context
    95  	exitFn context.CancelFunc
    96  	l      sync.RWMutex
    97  }
    98  
    99  // newDeploymentWatcher returns a deployment watcher that is used to watch
   100  // deployments and trigger the scheduler as needed.
   101  func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter,
   102  	logger log.Logger, state *state.StateStore, d *structs.Deployment,
   103  	j *structs.Job, triggers deploymentTriggers) *deploymentWatcher {
   104  
   105  	ctx, exitFn := context.WithCancel(parent)
   106  	w := &deploymentWatcher{
   107  		queryLimiter:       queryLimiter,
   108  		deploymentID:       d.ID,
   109  		deploymentUpdateCh: make(chan struct{}, 1),
   110  		d:                  d,
   111  		j:                  j,
   112  		state:              state,
   113  		deploymentTriggers: triggers,
   114  		logger:             logger.With("deployment_id", d.ID, "job", j.NamespacedID()),
   115  		ctx:                ctx,
   116  		exitFn:             exitFn,
   117  	}
   118  
   119  	// Start the long lived watcher that scans for allocation updates
   120  	go w.watch()
   121  
   122  	return w
   123  }
   124  
   125  // updateDeployment is used to update the tracked deployment.
   126  func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) {
   127  	w.l.Lock()
   128  	defer w.l.Unlock()
   129  
   130  	// Update and trigger
   131  	w.d = d
   132  	select {
   133  	case w.deploymentUpdateCh <- struct{}{}:
   134  	default:
   135  	}
   136  }
   137  
   138  // getDeployment returns the tracked deployment.
   139  func (w *deploymentWatcher) getDeployment() *structs.Deployment {
   140  	w.l.RLock()
   141  	defer w.l.RUnlock()
   142  	return w.d
   143  }
   144  
   145  func (w *deploymentWatcher) SetAllocHealth(
   146  	req *structs.DeploymentAllocHealthRequest,
   147  	resp *structs.DeploymentUpdateResponse) error {
   148  
   149  	// If we are failing the deployment, update the status and potentially
   150  	// rollback
   151  	var j *structs.Job
   152  	var u *structs.DeploymentStatusUpdate
   153  
   154  	// If there are unhealthy allocations we need to mark the deployment as
   155  	// failed and check if we should roll back to a stable job.
   156  	if l := len(req.UnhealthyAllocationIDs); l != 0 {
   157  		unhealthy := make(map[string]struct{}, l)
   158  		for _, alloc := range req.UnhealthyAllocationIDs {
   159  			unhealthy[alloc] = struct{}{}
   160  		}
   161  
   162  		// Get the allocations for the deployment
   163  		snap, err := w.state.Snapshot()
   164  		if err != nil {
   165  			return err
   166  		}
   167  
   168  		allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID)
   169  		if err != nil {
   170  			return err
   171  		}
   172  
   173  		// Determine if we should autorevert to an older job
   174  		desc := structs.DeploymentStatusDescriptionFailedAllocations
   175  		for _, alloc := range allocs {
   176  			// Check that the alloc has been marked unhealthy
   177  			if _, ok := unhealthy[alloc.ID]; !ok {
   178  				continue
   179  			}
   180  
   181  			// Check if the group has autorevert set
   182  			group, ok := w.getDeployment().TaskGroups[alloc.TaskGroup]
   183  			if !ok || !group.AutoRevert {
   184  				continue
   185  			}
   186  
   187  			var err error
   188  			j, err = w.latestStableJob()
   189  			if err != nil {
   190  				return err
   191  			}
   192  
   193  			if j != nil {
   194  				j, desc = w.handleRollbackValidity(j, desc)
   195  			}
   196  			break
   197  		}
   198  
   199  		u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   200  	}
   201  
   202  	// Canonicalize the job in case it doesn't have namespace set
   203  	j.Canonicalize()
   204  
   205  	// Create the request
   206  	areq := &structs.ApplyDeploymentAllocHealthRequest{
   207  		DeploymentAllocHealthRequest: *req,
   208  		Timestamp:                    time.Now(),
   209  		Eval:                         w.getEval(),
   210  		DeploymentUpdate:             u,
   211  		Job:                          j,
   212  	}
   213  
   214  	index, err := w.upsertDeploymentAllocHealth(areq)
   215  	if err != nil {
   216  		return err
   217  	}
   218  
   219  	// Build the response
   220  	resp.EvalID = areq.Eval.ID
   221  	resp.EvalCreateIndex = index
   222  	resp.DeploymentModifyIndex = index
   223  	resp.Index = index
   224  	if j != nil {
   225  		resp.RevertedJobVersion = helper.Uint64ToPtr(j.Version)
   226  	}
   227  	return nil
   228  }
   229  
   230  // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job
   231  // Returns a modified description and job accordingly.
   232  func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) {
   233  	// Only rollback if job being changed has a different spec.
   234  	// This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback
   235  	// If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed.
   236  	if w.j.SpecChanged(rollbackJob) {
   237  		desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version)
   238  	} else {
   239  		desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version)
   240  		rollbackJob = nil
   241  	}
   242  	return rollbackJob, desc
   243  }
   244  
   245  func (w *deploymentWatcher) PromoteDeployment(
   246  	req *structs.DeploymentPromoteRequest,
   247  	resp *structs.DeploymentUpdateResponse) error {
   248  
   249  	// Create the request
   250  	areq := &structs.ApplyDeploymentPromoteRequest{
   251  		DeploymentPromoteRequest: *req,
   252  		Eval:                     w.getEval(),
   253  	}
   254  
   255  	index, err := w.upsertDeploymentPromotion(areq)
   256  	if err != nil {
   257  		return err
   258  	}
   259  
   260  	// Build the response
   261  	resp.EvalID = areq.Eval.ID
   262  	resp.EvalCreateIndex = index
   263  	resp.DeploymentModifyIndex = index
   264  	resp.Index = index
   265  	return nil
   266  }
   267  
   268  // autoPromoteDeployment creates a synthetic promotion request, and upserts it for processing
   269  func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStub) error {
   270  	d := w.getDeployment()
   271  	if !d.HasPlacedCanaries() || !d.RequiresPromotion() {
   272  		return nil
   273  	}
   274  
   275  	// AutoPromote iff every task group is marked auto_promote and is healthy. The whole
   276  	// job version has been incremented, so we promote together. See also AutoRevert
   277  	for _, tv := range d.TaskGroups {
   278  		if !tv.AutoPromote || tv.DesiredCanaries != len(tv.PlacedCanaries) {
   279  			return nil
   280  		}
   281  
   282  		// Find the health status of each canary
   283  		for _, c := range tv.PlacedCanaries {
   284  			for _, a := range allocs {
   285  				if c == a.ID && !a.DeploymentStatus.IsHealthy() {
   286  					return nil
   287  				}
   288  			}
   289  		}
   290  	}
   291  
   292  	// Send the request
   293  	_, err := w.upsertDeploymentPromotion(&structs.ApplyDeploymentPromoteRequest{
   294  		DeploymentPromoteRequest: structs.DeploymentPromoteRequest{DeploymentID: d.GetID(), All: true},
   295  		Eval:                     w.getEval(),
   296  	})
   297  	return err
   298  }
   299  
   300  func (w *deploymentWatcher) PauseDeployment(
   301  	req *structs.DeploymentPauseRequest,
   302  	resp *structs.DeploymentUpdateResponse) error {
   303  	// Determine the status we should transition to and if we need to create an
   304  	// evaluation
   305  	status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused
   306  	var eval *structs.Evaluation
   307  	evalID := ""
   308  	if !req.Pause {
   309  		status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning
   310  		eval = w.getEval()
   311  		evalID = eval.ID
   312  	}
   313  	update := w.getDeploymentStatusUpdate(status, desc)
   314  
   315  	// Commit the change
   316  	i, err := w.upsertDeploymentStatusUpdate(update, eval, nil)
   317  	if err != nil {
   318  		return err
   319  	}
   320  
   321  	// Build the response
   322  	if evalID != "" {
   323  		resp.EvalID = evalID
   324  		resp.EvalCreateIndex = i
   325  	}
   326  	resp.DeploymentModifyIndex = i
   327  	resp.Index = i
   328  	return nil
   329  }
   330  
   331  func (w *deploymentWatcher) FailDeployment(
   332  	req *structs.DeploymentFailRequest,
   333  	resp *structs.DeploymentUpdateResponse) error {
   334  
   335  	status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser
   336  
   337  	// Determine if we should rollback
   338  	rollback := false
   339  	for _, state := range w.getDeployment().TaskGroups {
   340  		if state.AutoRevert {
   341  			rollback = true
   342  			break
   343  		}
   344  	}
   345  
   346  	var rollbackJob *structs.Job
   347  	if rollback {
   348  		var err error
   349  		rollbackJob, err = w.latestStableJob()
   350  		if err != nil {
   351  			return err
   352  		}
   353  
   354  		if rollbackJob != nil {
   355  			rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc)
   356  		} else {
   357  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   358  		}
   359  	}
   360  
   361  	// Commit the change
   362  	update := w.getDeploymentStatusUpdate(status, desc)
   363  	eval := w.getEval()
   364  	i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob)
   365  	if err != nil {
   366  		return err
   367  	}
   368  
   369  	// Build the response
   370  	resp.EvalID = eval.ID
   371  	resp.EvalCreateIndex = i
   372  	resp.DeploymentModifyIndex = i
   373  	resp.Index = i
   374  	if rollbackJob != nil {
   375  		resp.RevertedJobVersion = helper.Uint64ToPtr(rollbackJob.Version)
   376  	}
   377  	return nil
   378  }
   379  
   380  // StopWatch stops watching the deployment. This should be called whenever a
   381  // deployment is completed or the watcher is no longer needed.
   382  func (w *deploymentWatcher) StopWatch() {
   383  	w.exitFn()
   384  }
   385  
   386  // watch is the long running watcher that watches for both allocation and
   387  // deployment changes. Its function is to create evaluations to trigger the
   388  // scheduler when more progress can be made, to fail the deployment if it has
   389  // failed and potentially rolling back the job. Progress can be made when an
   390  // allocation transitions to healthy, so we create an eval.
   391  func (w *deploymentWatcher) watch() {
   392  	// Get the deadline. This is likely a zero time to begin with but we need to
   393  	// handle the case that the deployment has already progressed and we are now
   394  	// just starting to watch it. This must likely would occur if there was a
   395  	// leader transition and we are now starting our watcher.
   396  	currentDeadline := w.getDeploymentProgressCutoff(w.getDeployment())
   397  	var deadlineTimer *time.Timer
   398  	if currentDeadline.IsZero() {
   399  		deadlineTimer = time.NewTimer(0)
   400  		if !deadlineTimer.Stop() {
   401  			<-deadlineTimer.C
   402  		}
   403  	} else {
   404  		deadlineTimer = time.NewTimer(currentDeadline.Sub(time.Now()))
   405  	}
   406  
   407  	allocIndex := uint64(1)
   408  	var updates *allocUpdates
   409  
   410  	rollback, deadlineHit := false, false
   411  
   412  FAIL:
   413  	for {
   414  		select {
   415  		case <-w.ctx.Done():
   416  			// This is the successful case, and we stop the loop
   417  			return
   418  		case <-deadlineTimer.C:
   419  			// We have hit the progress deadline so fail the deployment. We need
   420  			// to determine whether we should roll back the job by inspecting
   421  			// which allocs as part of the deployment are healthy and which
   422  			// aren't.
   423  			deadlineHit = true
   424  			fail, rback, err := w.shouldFail()
   425  			if err != nil {
   426  				w.logger.Error("failed to determine whether to rollback job", "error", err)
   427  			}
   428  			if !fail {
   429  				w.logger.Debug("skipping deadline")
   430  				continue
   431  			}
   432  
   433  			w.logger.Debug("deadline hit", "rollback", rback)
   434  			rollback = rback
   435  			break FAIL
   436  		case <-w.deploymentUpdateCh:
   437  			// Get the updated deployment and check if we should change the
   438  			// deadline timer
   439  			next := w.getDeploymentProgressCutoff(w.getDeployment())
   440  			if !next.Equal(currentDeadline) {
   441  				prevDeadlineZero := currentDeadline.IsZero()
   442  				currentDeadline = next
   443  				// The most recent deadline can be zero if no allocs were created for this deployment.
   444  				// The deadline timer would have already been stopped once in that case. To prevent
   445  				// deadlocking on the already stopped deadline timer, we only drain the channel if
   446  				// the previous deadline was not zero.
   447  				if !prevDeadlineZero && !deadlineTimer.Stop() {
   448  					select {
   449  					case <-deadlineTimer.C:
   450  					default:
   451  					}
   452  				}
   453  
   454  				// If the next deadline is zero, we should not reset the timer
   455  				// as we aren't tracking towards a progress deadline yet. This
   456  				// can happen if you have multiple task groups with progress
   457  				// deadlines and one of the task groups hasn't made any
   458  				// placements. As soon as the other task group finishes its
   459  				// rollout, the next progress deadline becomes zero, so we want
   460  				// to avoid resetting, causing a deployment failure.
   461  				if !next.IsZero() {
   462  					deadlineTimer.Reset(next.Sub(time.Now()))
   463  				}
   464  			}
   465  
   466  		case updates = <-w.getAllocsCh(allocIndex):
   467  			if err := updates.err; err != nil {
   468  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   469  					return
   470  				}
   471  
   472  				w.logger.Error("failed to retrieve allocations", "error", err)
   473  				return
   474  			}
   475  			allocIndex = updates.index
   476  
   477  			// We have allocation changes for this deployment so determine the
   478  			// steps to take.
   479  			res, err := w.handleAllocUpdate(updates.allocs)
   480  			if err != nil {
   481  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   482  					return
   483  				}
   484  
   485  				w.logger.Error("failed handling allocation updates", "error", err)
   486  				return
   487  			}
   488  
   489  			// The deployment has failed, so break out of the watch loop and
   490  			// handle the failure
   491  			if res.failDeployment {
   492  				rollback = res.rollback
   493  				break FAIL
   494  			}
   495  
   496  			// If permitted, automatically promote this canary deployment
   497  			err = w.autoPromoteDeployment(updates.allocs)
   498  			if err != nil {
   499  				w.logger.Error("failed to auto promote deployment", "error", err)
   500  			}
   501  
   502  			// Create an eval to push the deployment along
   503  			if res.createEval || len(res.allowReplacements) != 0 {
   504  				w.createBatchedUpdate(res.allowReplacements, allocIndex)
   505  			}
   506  		}
   507  	}
   508  
   509  	// Change the deployments status to failed
   510  	desc := structs.DeploymentStatusDescriptionFailedAllocations
   511  	if deadlineHit {
   512  		desc = structs.DeploymentStatusDescriptionProgressDeadline
   513  	}
   514  
   515  	// Rollback to the old job if necessary
   516  	var j *structs.Job
   517  	if rollback {
   518  		var err error
   519  		j, err = w.latestStableJob()
   520  		if err != nil {
   521  			w.logger.Error("failed to lookup latest stable job", "error", err)
   522  		}
   523  
   524  		// Description should include that the job is being rolled back to
   525  		// version N
   526  		if j != nil {
   527  			j, desc = w.handleRollbackValidity(j, desc)
   528  		} else {
   529  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   530  		}
   531  	}
   532  
   533  	// Update the status of the deployment to failed and create an evaluation.
   534  	e := w.getEval()
   535  	u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   536  	if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil {
   537  		w.logger.Error("failed to update deployment status", "error", err)
   538  	}
   539  }
   540  
   541  // allocUpdateResult is used to return the desired actions given the newest set
   542  // of allocations for the deployment.
   543  type allocUpdateResult struct {
   544  	createEval        bool
   545  	failDeployment    bool
   546  	rollback          bool
   547  	allowReplacements []string
   548  }
   549  
   550  // handleAllocUpdate is used to compute the set of actions to take based on the
   551  // updated allocations for the deployment.
   552  func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) {
   553  	var res allocUpdateResult
   554  
   555  	// Get the latest evaluation index
   556  	latestEval, err := w.jobEvalStatus()
   557  	if err != nil {
   558  		if err == context.Canceled || w.ctx.Err() == context.Canceled {
   559  			return res, err
   560  		}
   561  
   562  		return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err)
   563  	}
   564  
   565  	deployment := w.getDeployment()
   566  	for _, alloc := range allocs {
   567  		dstate, ok := deployment.TaskGroups[alloc.TaskGroup]
   568  		if !ok {
   569  			continue
   570  		}
   571  
   572  		// Determine if the update stanza for this group is progress based
   573  		progressBased := dstate.ProgressDeadline != 0
   574  
   575  		// Check if the allocation has failed and we need to mark it for allow
   576  		// replacements
   577  		if progressBased && alloc.DeploymentStatus.IsUnhealthy() &&
   578  			deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
   579  			res.allowReplacements = append(res.allowReplacements, alloc.ID)
   580  			continue
   581  		}
   582  
   583  		// We need to create an eval so the job can progress.
   584  		if alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval {
   585  			res.createEval = true
   586  		}
   587  
   588  		// If the group is using a progress deadline, we don't have to do anything.
   589  		if progressBased {
   590  			continue
   591  		}
   592  
   593  		// Fail on the first bad allocation
   594  		if alloc.DeploymentStatus.IsUnhealthy() {
   595  			// Check if the group has autorevert set
   596  			if dstate.AutoRevert {
   597  				res.rollback = true
   598  			}
   599  
   600  			// Since we have an unhealthy allocation, fail the deployment
   601  			res.failDeployment = true
   602  		}
   603  
   604  		// All conditions have been hit so we can break
   605  		if res.createEval && res.failDeployment && res.rollback {
   606  			break
   607  		}
   608  	}
   609  
   610  	return res, nil
   611  }
   612  
   613  // shouldFail returns whether the job should be failed and whether it should
   614  // rolled back to an earlier stable version by examining the allocations in the
   615  // deployment.
   616  func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) {
   617  	snap, err := w.state.Snapshot()
   618  	if err != nil {
   619  		return false, false, err
   620  	}
   621  
   622  	d, err := snap.DeploymentByID(nil, w.deploymentID)
   623  	if err != nil {
   624  		return false, false, err
   625  	}
   626  	if d == nil {
   627  		// The deployment wasn't in the state store, possibly due to a system gc
   628  		return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID)
   629  	}
   630  
   631  	fail = false
   632  	for tg, state := range d.TaskGroups {
   633  		// If we are in a canary state we fail if there aren't enough healthy
   634  		// allocs to satisfy DesiredCanaries
   635  		if state.DesiredCanaries > 0 && !state.Promoted {
   636  			if state.HealthyAllocs >= state.DesiredCanaries {
   637  				continue
   638  			}
   639  		} else if state.HealthyAllocs >= state.DesiredTotal {
   640  			continue
   641  		}
   642  
   643  		// We have failed this TG
   644  		fail = true
   645  
   646  		// We don't need to autorevert this group
   647  		upd := w.j.LookupTaskGroup(tg).Update
   648  		if upd == nil || !upd.AutoRevert {
   649  			continue
   650  		}
   651  
   652  		// Unhealthy allocs and we need to autorevert
   653  		return true, true, nil
   654  	}
   655  
   656  	return fail, false, nil
   657  }
   658  
   659  // getDeploymentProgressCutoff returns the progress cutoff for the given
   660  // deployment
   661  func (w *deploymentWatcher) getDeploymentProgressCutoff(d *structs.Deployment) time.Time {
   662  	var next time.Time
   663  	doneTGs := w.doneGroups(d)
   664  	for name, state := range d.TaskGroups {
   665  		// This task group is done so we don't have to concern ourselves with
   666  		// its progress deadline.
   667  		if done, ok := doneTGs[name]; ok && done {
   668  			continue
   669  		}
   670  
   671  		if state.RequireProgressBy.IsZero() {
   672  			continue
   673  		}
   674  
   675  		if next.IsZero() || state.RequireProgressBy.Before(next) {
   676  			next = state.RequireProgressBy
   677  		}
   678  	}
   679  	return next
   680  }
   681  
   682  // doneGroups returns a map of task group to whether the deployment appears to
   683  // be done for the group. A true value doesn't mean no more action will be taken
   684  // in the life time of the deployment because there could always be node
   685  // failures, or rescheduling events.
   686  func (w *deploymentWatcher) doneGroups(d *structs.Deployment) map[string]bool {
   687  	if d == nil {
   688  		return nil
   689  	}
   690  
   691  	// Collect the allocations by the task group
   692  	snap, err := w.state.Snapshot()
   693  	if err != nil {
   694  		return nil
   695  	}
   696  
   697  	allocs, err := snap.AllocsByDeployment(nil, d.ID)
   698  	if err != nil {
   699  		return nil
   700  	}
   701  
   702  	// Go through the allocs and count up how many healthy allocs we have
   703  	healthy := make(map[string]int, len(d.TaskGroups))
   704  	for _, a := range allocs {
   705  		if a.TerminalStatus() || !a.DeploymentStatus.IsHealthy() {
   706  			continue
   707  		}
   708  		healthy[a.TaskGroup]++
   709  	}
   710  
   711  	// Go through each group and check if it done
   712  	groups := make(map[string]bool, len(d.TaskGroups))
   713  	for name, state := range d.TaskGroups {
   714  		// Requires promotion
   715  		if state.DesiredCanaries != 0 && !state.Promoted {
   716  			groups[name] = false
   717  			continue
   718  		}
   719  
   720  		// Check we have enough healthy currently running allocations
   721  		groups[name] = healthy[name] >= state.DesiredTotal
   722  	}
   723  
   724  	return groups
   725  }
   726  
   727  // latestStableJob returns the latest stable job. It may be nil if none exist
   728  func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
   729  	snap, err := w.state.Snapshot()
   730  	if err != nil {
   731  		return nil, err
   732  	}
   733  
   734  	versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID)
   735  	if err != nil {
   736  		return nil, err
   737  	}
   738  
   739  	var stable *structs.Job
   740  	for _, job := range versions {
   741  		if job.Stable {
   742  			stable = job
   743  			break
   744  		}
   745  	}
   746  
   747  	return stable, nil
   748  }
   749  
   750  // createBatchedUpdate creates an eval for the given index as well as updating
   751  // the given allocations to allow them to reschedule.
   752  func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
   753  	w.l.Lock()
   754  	defer w.l.Unlock()
   755  
   756  	// Store the allocations that can be replaced
   757  	for _, allocID := range allowReplacements {
   758  		if w.outstandingAllowReplacements == nil {
   759  			w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
   760  		}
   761  		w.outstandingAllowReplacements[allocID] = allowRescheduleTransition
   762  	}
   763  
   764  	if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
   765  		return
   766  	}
   767  
   768  	w.outstandingBatch = true
   769  
   770  	time.AfterFunc(perJobEvalBatchPeriod, func() {
   771  		// If the timer has been created and then we shutdown, we need to no-op
   772  		// the evaluation creation.
   773  		select {
   774  		case <-w.ctx.Done():
   775  			return
   776  		default:
   777  		}
   778  
   779  		w.l.Lock()
   780  		replacements := w.outstandingAllowReplacements
   781  		w.outstandingAllowReplacements = nil
   782  		w.outstandingBatch = false
   783  		w.l.Unlock()
   784  
   785  		// Create the eval
   786  		if _, err := w.createUpdate(replacements, w.getEval()); err != nil {
   787  			w.logger.Error("failed to create evaluation for deployment", "deployment_id", w.deploymentID, "error", err)
   788  		}
   789  	})
   790  }
   791  
   792  // getEval returns an evaluation suitable for the deployment
   793  func (w *deploymentWatcher) getEval() *structs.Evaluation {
   794  	now := time.Now().UTC().UnixNano()
   795  	return &structs.Evaluation{
   796  		ID:           uuid.Generate(),
   797  		Namespace:    w.j.Namespace,
   798  		Priority:     w.j.Priority,
   799  		Type:         w.j.Type,
   800  		TriggeredBy:  structs.EvalTriggerDeploymentWatcher,
   801  		JobID:        w.j.ID,
   802  		DeploymentID: w.deploymentID,
   803  		Status:       structs.EvalStatusPending,
   804  		CreateTime:   now,
   805  		ModifyTime:   now,
   806  	}
   807  }
   808  
   809  // getDeploymentStatusUpdate returns a deployment status update
   810  func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate {
   811  	return &structs.DeploymentStatusUpdate{
   812  		DeploymentID:      w.deploymentID,
   813  		Status:            status,
   814  		StatusDescription: desc,
   815  	}
   816  }
   817  
   818  type allocUpdates struct {
   819  	allocs []*structs.AllocListStub
   820  	index  uint64
   821  	err    error
   822  }
   823  
   824  // getAllocsCh creates a channel and starts a goroutine that
   825  // 1. parks a blocking query for allocations on the state
   826  // 2. reads those and drops them on the channel
   827  // This query runs once here, but watch calls it in a loop
   828  func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates {
   829  	out := make(chan *allocUpdates, 1)
   830  	go func() {
   831  		allocs, index, err := w.getAllocs(index)
   832  		out <- &allocUpdates{
   833  			allocs: allocs,
   834  			index:  index,
   835  			err:    err,
   836  		}
   837  	}()
   838  
   839  	return out
   840  }
   841  
   842  // getAllocs retrieves the allocations that are part of the deployment blocking
   843  // at the given index.
   844  func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) {
   845  	resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx)
   846  	if err != nil {
   847  		return nil, 0, err
   848  	}
   849  	if err := w.ctx.Err(); err != nil {
   850  		return nil, 0, err
   851  	}
   852  
   853  	return resp.([]*structs.AllocListStub), index, nil
   854  }
   855  
   856  // getDeploysImpl retrieves all deployments from the passed state store.
   857  func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
   858  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   859  		return nil, 0, err
   860  	}
   861  
   862  	// Capture all the allocations
   863  	allocs, err := state.AllocsByDeployment(ws, w.deploymentID)
   864  	if err != nil {
   865  		return nil, 0, err
   866  	}
   867  
   868  	maxIndex := uint64(0)
   869  	stubs := make([]*structs.AllocListStub, 0, len(allocs))
   870  	for _, alloc := range allocs {
   871  		stubs = append(stubs, alloc.Stub())
   872  
   873  		if maxIndex < alloc.ModifyIndex {
   874  			maxIndex = alloc.ModifyIndex
   875  		}
   876  	}
   877  
   878  	// Use the last index that affected the allocs table
   879  	if len(stubs) == 0 {
   880  		index, err := state.Index("allocs")
   881  		if err != nil {
   882  			return nil, index, err
   883  		}
   884  		maxIndex = index
   885  	}
   886  
   887  	return stubs, maxIndex, nil
   888  }
   889  
   890  // jobEvalStatus returns the latest eval index for a job. The index is used to
   891  // determine if an allocation update requires an evaluation to be triggered.
   892  func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, err error) {
   893  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   894  		return 0, err
   895  	}
   896  
   897  	snap, err := w.state.Snapshot()
   898  	if err != nil {
   899  		return 0, err
   900  	}
   901  
   902  	evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID)
   903  	if err != nil {
   904  		return 0, err
   905  	}
   906  
   907  	// If there are no evals for the job, return zero, since we want any
   908  	// allocation change to trigger an evaluation.
   909  	if len(evals) == 0 {
   910  		return 0, nil
   911  	}
   912  
   913  	var max uint64
   914  	for _, eval := range evals {
   915  		// A cancelled eval never impacts what the scheduler has saw, so do not
   916  		// use it's indexes.
   917  		if eval.Status == structs.EvalStatusCancelled {
   918  			continue
   919  		}
   920  
   921  		// Prefer using the snapshot index. Otherwise use the create index
   922  		if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex {
   923  			max = eval.SnapshotIndex
   924  		} else if max < eval.CreateIndex {
   925  			max = eval.CreateIndex
   926  		}
   927  	}
   928  
   929  	return max, nil
   930  }