github.com/hernad/nomad@v1.6.112/nomad/deploymentwatcher/deployment_watcher.go

github.com/hernad/nomad@v1.6.112/nomad/deploymentwatcher/deployment_watcher.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package deploymentwatcher
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	log "github.com/hashicorp/go-hclog"
    13  	memdb "github.com/hashicorp/go-memdb"
    14  	"github.com/hernad/nomad/helper/pointer"
    15  	"github.com/hernad/nomad/helper/uuid"
    16  	"github.com/hernad/nomad/nomad/state"
    17  	"github.com/hernad/nomad/nomad/structs"
    18  	"golang.org/x/time/rate"
    19  )
    20  
    21  const (
    22  	// perJobEvalBatchPeriod is the batching length before creating an evaluation to
    23  	// trigger the scheduler when allocations are marked as healthy.
    24  	perJobEvalBatchPeriod = 1 * time.Second
    25  )
    26  
    27  var (
    28  	// allowRescheduleTransition is the transition that allows failed
    29  	// allocations part of a deployment to be rescheduled. We create a one off
    30  	// variable to avoid creating a new object for every request.
    31  	allowRescheduleTransition = &structs.DesiredTransition{
    32  		Reschedule: pointer.Of(true),
    33  	}
    34  )
    35  
    36  // deploymentTriggers are the set of functions required to trigger changes on
    37  // behalf of a deployment
    38  type deploymentTriggers interface {
    39  	// createUpdate is used to create allocation desired transition updates and
    40  	// an evaluation.
    41  	createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error)
    42  
    43  	// upsertJob is used to roll back a job when autoreverting for a deployment
    44  	upsertJob(job *structs.Job) (uint64, error)
    45  
    46  	// upsertDeploymentStatusUpdate is used to upsert a deployment status update
    47  	// and an optional evaluation and job to upsert
    48  	upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error)
    49  
    50  	// upsertDeploymentPromotion is used to promote canaries in a deployment
    51  	upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error)
    52  
    53  	// upsertDeploymentAllocHealth is used to set the health of allocations in a
    54  	// deployment
    55  	upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error)
    56  }
    57  
    58  // deploymentWatcher is used to watch a single deployment and trigger the
    59  // scheduler when allocation health transitions.
    60  type deploymentWatcher struct {
    61  	// queryLimiter is used to limit the rate of blocking queries
    62  	queryLimiter *rate.Limiter
    63  
    64  	// deploymentTriggers holds the methods required to trigger changes on behalf of the
    65  	// deployment
    66  	deploymentTriggers
    67  
    68  	// DeploymentRPC holds methods for interacting with peer regions
    69  	// in enterprise edition
    70  	DeploymentRPC
    71  
    72  	// JobRPC holds methods for interacting with peer regions
    73  	// in enterprise edition
    74  	JobRPC
    75  
    76  	// state is the state that is watched for state changes.
    77  	state *state.StateStore
    78  
    79  	// deploymentID is the deployment's ID being watched
    80  	deploymentID string
    81  
    82  	// deploymentUpdateCh is triggered when there is an updated deployment
    83  	deploymentUpdateCh chan struct{}
    84  
    85  	// d is the deployment being watched
    86  	d *structs.Deployment
    87  
    88  	// j is the job the deployment is for
    89  	j *structs.Job
    90  
    91  	// outstandingBatch marks whether an outstanding function exists to create
    92  	// the evaluation. Access should be done through the lock.
    93  	outstandingBatch bool
    94  
    95  	// outstandingAllowReplacements is the map of allocations that will be
    96  	// marked as allowing a replacement. Access should be done through the lock.
    97  	outstandingAllowReplacements map[string]*structs.DesiredTransition
    98  
    99  	// latestEval is the latest eval for the job. It is updated by the watch
   100  	// loop and any time an evaluation is created. The field should be accessed
   101  	// by holding the lock or using the setter and getter methods.
   102  	latestEval uint64
   103  
   104  	logger log.Logger
   105  	ctx    context.Context
   106  	exitFn context.CancelFunc
   107  	l      sync.RWMutex
   108  }
   109  
   110  // newDeploymentWatcher returns a deployment watcher that is used to watch
   111  // deployments and trigger the scheduler as needed.
   112  func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter,
   113  	logger log.Logger, state *state.StateStore, d *structs.Deployment,
   114  	j *structs.Job, triggers deploymentTriggers,
   115  	deploymentRPC DeploymentRPC, jobRPC JobRPC) *deploymentWatcher {
   116  
   117  	ctx, exitFn := context.WithCancel(parent)
   118  	w := &deploymentWatcher{
   119  		queryLimiter:       queryLimiter,
   120  		deploymentID:       d.ID,
   121  		deploymentUpdateCh: make(chan struct{}, 1),
   122  		d:                  d,
   123  		j:                  j,
   124  		state:              state,
   125  		deploymentTriggers: triggers,
   126  		DeploymentRPC:      deploymentRPC,
   127  		JobRPC:             jobRPC,
   128  		logger:             logger.With("deployment_id", d.ID, "job", j.NamespacedID()),
   129  		ctx:                ctx,
   130  		exitFn:             exitFn,
   131  	}
   132  
   133  	// Start the long lived watcher that scans for allocation updates
   134  	go w.watch()
   135  
   136  	return w
   137  }
   138  
   139  // updateDeployment is used to update the tracked deployment.
   140  func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) {
   141  	w.l.Lock()
   142  	defer w.l.Unlock()
   143  
   144  	// Update and trigger
   145  	w.d = d
   146  	select {
   147  	case w.deploymentUpdateCh <- struct{}{}:
   148  	default:
   149  	}
   150  }
   151  
   152  // getDeployment returns the tracked deployment.
   153  func (w *deploymentWatcher) getDeployment() *structs.Deployment {
   154  	w.l.RLock()
   155  	defer w.l.RUnlock()
   156  	return w.d
   157  }
   158  
   159  func (w *deploymentWatcher) SetAllocHealth(
   160  	req *structs.DeploymentAllocHealthRequest,
   161  	resp *structs.DeploymentUpdateResponse) error {
   162  
   163  	// If we are failing the deployment, update the status and potentially
   164  	// rollback
   165  	var j *structs.Job
   166  	var u *structs.DeploymentStatusUpdate
   167  
   168  	// If there are unhealthy allocations we need to mark the deployment as
   169  	// failed and check if we should roll back to a stable job.
   170  	if l := len(req.UnhealthyAllocationIDs); l != 0 {
   171  		unhealthy := make(map[string]struct{}, l)
   172  		for _, alloc := range req.UnhealthyAllocationIDs {
   173  			unhealthy[alloc] = struct{}{}
   174  		}
   175  
   176  		// Get the allocations for the deployment
   177  		snap, err := w.state.Snapshot()
   178  		if err != nil {
   179  			return err
   180  		}
   181  
   182  		allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID)
   183  		if err != nil {
   184  			return err
   185  		}
   186  
   187  		// Determine if we should autorevert to an older job
   188  		desc := structs.DeploymentStatusDescriptionFailedAllocations
   189  		for _, alloc := range allocs {
   190  			// Check that the alloc has been marked unhealthy
   191  			if _, ok := unhealthy[alloc.ID]; !ok {
   192  				continue
   193  			}
   194  
   195  			// Check if the group has autorevert set
   196  			dstate, ok := w.getDeployment().TaskGroups[alloc.TaskGroup]
   197  			if !ok || !dstate.AutoRevert {
   198  				continue
   199  			}
   200  
   201  			var err error
   202  			j, err = w.latestStableJob()
   203  			if err != nil {
   204  				return err
   205  			}
   206  
   207  			if j != nil {
   208  				j, desc = w.handleRollbackValidity(j, desc)
   209  			}
   210  			break
   211  		}
   212  
   213  		u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   214  	}
   215  
   216  	// Canonicalize the job in case it doesn't have namespace set
   217  	j.Canonicalize()
   218  
   219  	// Create the request
   220  	areq := &structs.ApplyDeploymentAllocHealthRequest{
   221  		DeploymentAllocHealthRequest: *req,
   222  		Timestamp:                    time.Now(),
   223  		Eval:                         w.getEval(),
   224  		DeploymentUpdate:             u,
   225  		Job:                          j,
   226  	}
   227  
   228  	index, err := w.upsertDeploymentAllocHealth(areq)
   229  	if err != nil {
   230  		return err
   231  	}
   232  
   233  	// Build the response
   234  	resp.EvalID = areq.Eval.ID
   235  	resp.EvalCreateIndex = index
   236  	resp.DeploymentModifyIndex = index
   237  	resp.Index = index
   238  	if j != nil {
   239  		resp.RevertedJobVersion = pointer.Of(j.Version)
   240  	}
   241  	return nil
   242  }
   243  
   244  // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job
   245  // Returns a modified description and job accordingly.
   246  func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) {
   247  	// Only rollback if job being changed has a different spec.
   248  	// This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback
   249  	// If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed.
   250  	if w.j.SpecChanged(rollbackJob) {
   251  		desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version)
   252  	} else {
   253  		desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version)
   254  		rollbackJob = nil
   255  	}
   256  	return rollbackJob, desc
   257  }
   258  
   259  func (w *deploymentWatcher) PromoteDeployment(
   260  	req *structs.DeploymentPromoteRequest,
   261  	resp *structs.DeploymentUpdateResponse) error {
   262  
   263  	// Create the request
   264  	areq := &structs.ApplyDeploymentPromoteRequest{
   265  		DeploymentPromoteRequest: *req,
   266  		Eval:                     w.getEval(),
   267  	}
   268  
   269  	index, err := w.upsertDeploymentPromotion(areq)
   270  	if err != nil {
   271  		return err
   272  	}
   273  
   274  	// Build the response
   275  	resp.EvalID = areq.Eval.ID
   276  	resp.EvalCreateIndex = index
   277  	resp.DeploymentModifyIndex = index
   278  	resp.Index = index
   279  	return nil
   280  }
   281  
   282  // autoPromoteDeployment creates a synthetic promotion request, and upserts it for processing
   283  func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStub) error {
   284  	d := w.getDeployment()
   285  	if !d.HasPlacedCanaries() || !d.RequiresPromotion() {
   286  		return nil
   287  	}
   288  
   289  	// AutoPromote iff every task group with canaries is marked auto_promote and is healthy. The whole
   290  	// job version has been incremented, so we promote together. See also AutoRevert
   291  	for _, dstate := range d.TaskGroups {
   292  
   293  		// skip auto promote canary validation if the task group has no canaries
   294  		// to prevent auto promote hanging on mixed canary/non-canary taskgroup deploys
   295  		if dstate.DesiredCanaries < 1 {
   296  			continue
   297  		}
   298  
   299  		if !dstate.AutoPromote || len(dstate.PlacedCanaries) < dstate.DesiredCanaries {
   300  			return nil
   301  		}
   302  
   303  		healthyCanaries := 0
   304  		// Find the health status of each canary
   305  		for _, c := range dstate.PlacedCanaries {
   306  			for _, a := range allocs {
   307  				if c == a.ID && a.DeploymentStatus.IsHealthy() {
   308  					healthyCanaries += 1
   309  				}
   310  			}
   311  		}
   312  		if healthyCanaries != dstate.DesiredCanaries {
   313  			return nil
   314  		}
   315  	}
   316  
   317  	// Send the request
   318  	_, err := w.upsertDeploymentPromotion(&structs.ApplyDeploymentPromoteRequest{
   319  		DeploymentPromoteRequest: structs.DeploymentPromoteRequest{DeploymentID: d.GetID(), All: true},
   320  		Eval:                     w.getEval(),
   321  	})
   322  	return err
   323  }
   324  
   325  func (w *deploymentWatcher) PauseDeployment(
   326  	req *structs.DeploymentPauseRequest,
   327  	resp *structs.DeploymentUpdateResponse) error {
   328  	// Determine the status we should transition to and if we need to create an
   329  	// evaluation
   330  	status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused
   331  	var eval *structs.Evaluation
   332  	evalID := ""
   333  	if !req.Pause {
   334  		status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning
   335  		eval = w.getEval()
   336  		evalID = eval.ID
   337  	}
   338  	update := w.getDeploymentStatusUpdate(status, desc)
   339  
   340  	// Commit the change
   341  	i, err := w.upsertDeploymentStatusUpdate(update, eval, nil)
   342  	if err != nil {
   343  		return err
   344  	}
   345  
   346  	// Build the response
   347  	if evalID != "" {
   348  		resp.EvalID = evalID
   349  		resp.EvalCreateIndex = i
   350  	}
   351  	resp.DeploymentModifyIndex = i
   352  	resp.Index = i
   353  	return nil
   354  }
   355  
   356  func (w *deploymentWatcher) FailDeployment(
   357  	req *structs.DeploymentFailRequest,
   358  	resp *structs.DeploymentUpdateResponse) error {
   359  
   360  	status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser
   361  
   362  	// Determine if we should rollback
   363  	rollback := false
   364  	for _, dstate := range w.getDeployment().TaskGroups {
   365  		if dstate.AutoRevert {
   366  			rollback = true
   367  			break
   368  		}
   369  	}
   370  
   371  	var rollbackJob *structs.Job
   372  	if rollback {
   373  		var err error
   374  		rollbackJob, err = w.latestStableJob()
   375  		if err != nil {
   376  			return err
   377  		}
   378  
   379  		if rollbackJob != nil {
   380  			rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc)
   381  		} else {
   382  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   383  		}
   384  	}
   385  
   386  	// Commit the change
   387  	update := w.getDeploymentStatusUpdate(status, desc)
   388  	eval := w.getEval()
   389  	i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob)
   390  	if err != nil {
   391  		return err
   392  	}
   393  
   394  	// Build the response
   395  	resp.EvalID = eval.ID
   396  	resp.EvalCreateIndex = i
   397  	resp.DeploymentModifyIndex = i
   398  	resp.Index = i
   399  	if rollbackJob != nil {
   400  		resp.RevertedJobVersion = pointer.Of(rollbackJob.Version)
   401  	}
   402  	return nil
   403  }
   404  
   405  // StopWatch stops watching the deployment. This should be called whenever a
   406  // deployment is completed or the watcher is no longer needed.
   407  func (w *deploymentWatcher) StopWatch() {
   408  	w.exitFn()
   409  }
   410  
   411  // watch is the long running watcher that watches for both allocation and
   412  // deployment changes. Its function is to create evaluations to trigger the
   413  // scheduler when more progress can be made, to fail the deployment if it has
   414  // failed and potentially rolling back the job. Progress can be made when an
   415  // allocation transitions to healthy, so we create an eval.
   416  func (w *deploymentWatcher) watch() {
   417  	// Get the deadline. This is likely a zero time to begin with but we need to
   418  	// handle the case that the deployment has already progressed and we are now
   419  	// just starting to watch it. This must likely would occur if there was a
   420  	// leader transition and we are now starting our watcher.
   421  	currentDeadline := w.getDeploymentProgressCutoff(w.getDeployment())
   422  	var deadlineTimer *time.Timer
   423  	if currentDeadline.IsZero() {
   424  		deadlineTimer = time.NewTimer(0)
   425  		if !deadlineTimer.Stop() {
   426  			<-deadlineTimer.C
   427  		}
   428  	} else {
   429  		deadlineTimer = time.NewTimer(time.Until(currentDeadline))
   430  	}
   431  
   432  	allocIndex := uint64(1)
   433  	allocsCh := w.getAllocsCh(allocIndex)
   434  	var updates *allocUpdates
   435  
   436  	rollback, deadlineHit := false, false
   437  
   438  FAIL:
   439  	for {
   440  		select {
   441  		case <-w.ctx.Done():
   442  			// This is the successful case, and we stop the loop
   443  			return
   444  		case <-deadlineTimer.C:
   445  			// We have hit the progress deadline, so fail the deployment
   446  			// unless we're waiting for manual promotion. We need to determine
   447  			// whether we should roll back the job by inspecting which allocs
   448  			// as part of the deployment are healthy and which aren't. The
   449  			// deadlineHit flag is never reset, so even in the case of a
   450  			// manual promotion, we'll describe any failure as a progress
   451  			// deadline failure at this point.
   452  			deadlineHit = true
   453  			fail, rback, err := w.shouldFail()
   454  			if err != nil {
   455  				w.logger.Error("failed to determine whether to rollback job", "error", err)
   456  			}
   457  			if !fail {
   458  				w.logger.Debug("skipping deadline")
   459  				continue
   460  			}
   461  
   462  			w.logger.Debug("deadline hit", "rollback", rback)
   463  			rollback = rback
   464  			err = w.nextRegion(structs.DeploymentStatusFailed)
   465  			if err != nil {
   466  				w.logger.Error("multiregion deployment error", "error", err)
   467  			}
   468  			break FAIL
   469  		case <-w.deploymentUpdateCh:
   470  			// Get the updated deployment and check if we should change the
   471  			// deadline timer
   472  			next := w.getDeploymentProgressCutoff(w.getDeployment())
   473  			if !next.Equal(currentDeadline) {
   474  				prevDeadlineZero := currentDeadline.IsZero()
   475  				currentDeadline = next
   476  				// The most recent deadline can be zero if no allocs were created for this deployment.
   477  				// The deadline timer would have already been stopped once in that case. To prevent
   478  				// deadlocking on the already stopped deadline timer, we only drain the channel if
   479  				// the previous deadline was not zero.
   480  				if !prevDeadlineZero && !deadlineTimer.Stop() {
   481  					select {
   482  					case <-deadlineTimer.C:
   483  					default:
   484  					}
   485  				}
   486  
   487  				// If the next deadline is zero, we should not reset the timer
   488  				// as we aren't tracking towards a progress deadline yet. This
   489  				// can happen if you have multiple task groups with progress
   490  				// deadlines and one of the task groups hasn't made any
   491  				// placements. As soon as the other task group finishes its
   492  				// rollout, the next progress deadline becomes zero, so we want
   493  				// to avoid resetting, causing a deployment failure.
   494  				if !next.IsZero() {
   495  					deadlineTimer.Reset(time.Until(next))
   496  					w.logger.Trace("resetting deadline")
   497  				}
   498  			}
   499  
   500  			err := w.nextRegion(w.getStatus())
   501  			if err != nil {
   502  				break FAIL
   503  			}
   504  
   505  		case updates = <-allocsCh:
   506  			if err := updates.err; err != nil {
   507  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   508  					return
   509  				}
   510  
   511  				w.logger.Error("failed to retrieve allocations", "error", err)
   512  				return
   513  			}
   514  			allocIndex = updates.index
   515  
   516  			// We have allocation changes for this deployment so determine the
   517  			// steps to take.
   518  			res, err := w.handleAllocUpdate(updates.allocs)
   519  			if err != nil {
   520  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   521  					return
   522  				}
   523  
   524  				w.logger.Error("failed handling allocation updates", "error", err)
   525  				return
   526  			}
   527  
   528  			// The deployment has failed, so break out of the watch loop and
   529  			// handle the failure
   530  			if res.failDeployment {
   531  				rollback = res.rollback
   532  				err := w.nextRegion(structs.DeploymentStatusFailed)
   533  				if err != nil {
   534  					w.logger.Error("multiregion deployment error", "error", err)
   535  				}
   536  				break FAIL
   537  			}
   538  
   539  			// If permitted, automatically promote this canary deployment
   540  			err = w.autoPromoteDeployment(updates.allocs)
   541  			if err != nil {
   542  				w.logger.Error("failed to auto promote deployment", "error", err)
   543  			}
   544  
   545  			// Create an eval to push the deployment along
   546  			if res.createEval || len(res.allowReplacements) != 0 {
   547  				w.createBatchedUpdate(res.allowReplacements, allocIndex)
   548  			}
   549  
   550  			// only start a new blocking query if we haven't returned early
   551  			allocsCh = w.getAllocsCh(allocIndex)
   552  		}
   553  	}
   554  
   555  	// Change the deployments status to failed
   556  	desc := structs.DeploymentStatusDescriptionFailedAllocations
   557  	if deadlineHit {
   558  		desc = structs.DeploymentStatusDescriptionProgressDeadline
   559  	}
   560  
   561  	// Rollback to the old job if necessary
   562  	var j *structs.Job
   563  	if rollback {
   564  		var err error
   565  		j, err = w.latestStableJob()
   566  		if err != nil {
   567  			w.logger.Error("failed to lookup latest stable job", "error", err)
   568  		}
   569  
   570  		// Description should include that the job is being rolled back to
   571  		// version N
   572  		if j != nil {
   573  			j, desc = w.handleRollbackValidity(j, desc)
   574  		} else {
   575  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   576  		}
   577  	}
   578  
   579  	// Update the status of the deployment to failed and create an evaluation.
   580  	e := w.getEval()
   581  	u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   582  	if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil {
   583  		w.logger.Error("failed to update deployment status", "error", err)
   584  	}
   585  }
   586  
   587  // allocUpdateResult is used to return the desired actions given the newest set
   588  // of allocations for the deployment.
   589  type allocUpdateResult struct {
   590  	createEval        bool
   591  	failDeployment    bool
   592  	rollback          bool
   593  	allowReplacements []string
   594  }
   595  
   596  // handleAllocUpdate is used to compute the set of actions to take based on the
   597  // updated allocations for the deployment.
   598  func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) {
   599  	var res allocUpdateResult
   600  
   601  	// Get the latest evaluation index
   602  	latestEval, err := w.jobEvalStatus()
   603  	if err != nil {
   604  		if err == context.Canceled || w.ctx.Err() == context.Canceled {
   605  			return res, err
   606  		}
   607  
   608  		return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err)
   609  	}
   610  
   611  	deployment := w.getDeployment()
   612  	for _, alloc := range allocs {
   613  		dstate, ok := deployment.TaskGroups[alloc.TaskGroup]
   614  		if !ok {
   615  			continue
   616  		}
   617  
   618  		// Check if we can already fail the deployment
   619  		failDeployment := w.shouldFailEarly(deployment, alloc, dstate)
   620  
   621  		// Check if the allocation has failed and we need to mark it for allow
   622  		// replacements
   623  		if alloc.DeploymentStatus.IsUnhealthy() && !failDeployment &&
   624  			deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
   625  			res.allowReplacements = append(res.allowReplacements, alloc.ID)
   626  			continue
   627  		}
   628  
   629  		// We need to create an eval so the job can progress.
   630  		if alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval {
   631  			res.createEval = true
   632  		}
   633  
   634  		if failDeployment {
   635  			// Check if the group has autorevert set
   636  			if dstate.AutoRevert {
   637  				res.rollback = true
   638  			}
   639  
   640  			res.failDeployment = true
   641  		}
   642  
   643  		// All conditions have been hit so we can break
   644  		if res.createEval && res.failDeployment && res.rollback {
   645  			break
   646  		}
   647  	}
   648  
   649  	return res, nil
   650  }
   651  
   652  // shouldFail returns whether the job should be failed and whether it should
   653  // rolled back to an earlier stable version by examining the allocations in the
   654  // deployment.
   655  func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) {
   656  	snap, err := w.state.Snapshot()
   657  	if err != nil {
   658  		return false, false, err
   659  	}
   660  
   661  	d, err := snap.DeploymentByID(nil, w.deploymentID)
   662  	if err != nil {
   663  		return false, false, err
   664  	}
   665  	if d == nil {
   666  		// The deployment wasn't in the state store, possibly due to a system gc
   667  		return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID)
   668  	}
   669  
   670  	fail = false
   671  	for tg, dstate := range d.TaskGroups {
   672  		// If we are in a canary state we fail if there aren't enough healthy
   673  		// allocs to satisfy DesiredCanaries
   674  		if dstate.DesiredCanaries > 0 && !dstate.Promoted {
   675  			if dstate.HealthyAllocs >= dstate.DesiredCanaries {
   676  				continue
   677  			}
   678  		} else if dstate.HealthyAllocs >= dstate.DesiredTotal {
   679  			continue
   680  		}
   681  
   682  		// We have failed this TG
   683  		fail = true
   684  
   685  		// We don't need to autorevert this group
   686  		upd := w.j.LookupTaskGroup(tg).Update
   687  		if upd == nil || !upd.AutoRevert {
   688  			continue
   689  		}
   690  
   691  		// Unhealthy allocs and we need to autorevert
   692  		return fail, true, nil
   693  	}
   694  
   695  	return fail, false, nil
   696  }
   697  
   698  func (w *deploymentWatcher) shouldFailEarly(deployment *structs.Deployment, alloc *structs.AllocListStub, dstate *structs.DeploymentState) bool {
   699  	if !alloc.DeploymentStatus.IsUnhealthy() {
   700  		return false
   701  	}
   702  
   703  	// Fail on the first unhealthy allocation if no progress deadline is specified.
   704  	if dstate.ProgressDeadline == 0 {
   705  		w.logger.Debug("failing deployment because an allocation failed and the deployment is not progress based", "alloc", alloc.ID)
   706  		return true
   707  	}
   708  
   709  	if deployment.Active() {
   710  		reschedulePolicy := w.j.LookupTaskGroup(alloc.TaskGroup).ReschedulePolicy
   711  		isRescheduleEligible := alloc.RescheduleEligible(reschedulePolicy, time.Now())
   712  		if !isRescheduleEligible {
   713  			// We have run out of reschedule attempts: do not wait for the progress deadline to expire because
   714  			// we know that we will not be able to try to get another allocation healthy
   715  			w.logger.Debug("failing deployment because an allocation has failed and the task group has run out of reschedule attempts", "alloc", alloc.ID)
   716  			return true
   717  		}
   718  	}
   719  
   720  	return false
   721  }
   722  
   723  // getDeploymentProgressCutoff returns the progress cutoff for the given
   724  // deployment
   725  func (w *deploymentWatcher) getDeploymentProgressCutoff(d *structs.Deployment) time.Time {
   726  	var next time.Time
   727  	doneTGs := w.doneGroups(d)
   728  	for name, dstate := range d.TaskGroups {
   729  		// This task group is done so we don't have to concern ourselves with
   730  		// its progress deadline.
   731  		if done, ok := doneTGs[name]; ok && done {
   732  			continue
   733  		}
   734  
   735  		if dstate.RequireProgressBy.IsZero() {
   736  			continue
   737  		}
   738  
   739  		if next.IsZero() || dstate.RequireProgressBy.Before(next) {
   740  			next = dstate.RequireProgressBy
   741  		}
   742  	}
   743  	return next
   744  }
   745  
   746  // doneGroups returns a map of task group to whether the deployment appears to
   747  // be done for the group. A true value doesn't mean no more action will be taken
   748  // in the life time of the deployment because there could always be node
   749  // failures, or rescheduling events.
   750  func (w *deploymentWatcher) doneGroups(d *structs.Deployment) map[string]bool {
   751  	if d == nil {
   752  		return nil
   753  	}
   754  
   755  	// Collect the allocations by the task group
   756  	snap, err := w.state.Snapshot()
   757  	if err != nil {
   758  		return nil
   759  	}
   760  
   761  	allocs, err := snap.AllocsByDeployment(nil, d.ID)
   762  	if err != nil {
   763  		return nil
   764  	}
   765  
   766  	// Go through the allocs and count up how many healthy allocs we have
   767  	healthy := make(map[string]int, len(d.TaskGroups))
   768  	for _, a := range allocs {
   769  		if a.TerminalStatus() || !a.DeploymentStatus.IsHealthy() {
   770  			continue
   771  		}
   772  		healthy[a.TaskGroup]++
   773  	}
   774  
   775  	// Go through each group and check if it done
   776  	groups := make(map[string]bool, len(d.TaskGroups))
   777  	for name, dstate := range d.TaskGroups {
   778  		// Requires promotion
   779  		if dstate.DesiredCanaries != 0 && !dstate.Promoted {
   780  			groups[name] = false
   781  			continue
   782  		}
   783  
   784  		// Check we have enough healthy currently running allocations
   785  		groups[name] = healthy[name] >= dstate.DesiredTotal
   786  	}
   787  
   788  	return groups
   789  }
   790  
   791  // latestStableJob returns the latest stable job. It may be nil if none exist
   792  func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
   793  	snap, err := w.state.Snapshot()
   794  	if err != nil {
   795  		return nil, err
   796  	}
   797  
   798  	versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID)
   799  	if err != nil {
   800  		return nil, err
   801  	}
   802  
   803  	var stable *structs.Job
   804  	for _, job := range versions {
   805  		if job.Stable {
   806  			stable = job
   807  			break
   808  		}
   809  	}
   810  
   811  	return stable, nil
   812  }
   813  
   814  // createBatchedUpdate creates an eval for the given index as well as updating
   815  // the given allocations to allow them to reschedule.
   816  func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
   817  	w.l.Lock()
   818  	defer w.l.Unlock()
   819  
   820  	// Store the allocations that can be replaced
   821  	for _, allocID := range allowReplacements {
   822  		if w.outstandingAllowReplacements == nil {
   823  			w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
   824  		}
   825  		w.outstandingAllowReplacements[allocID] = allowRescheduleTransition
   826  	}
   827  
   828  	if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
   829  		return
   830  	}
   831  
   832  	w.outstandingBatch = true
   833  
   834  	time.AfterFunc(perJobEvalBatchPeriod, func() {
   835  		// If the timer has been created and then we shutdown, we need to no-op
   836  		// the evaluation creation.
   837  		select {
   838  		case <-w.ctx.Done():
   839  			return
   840  		default:
   841  		}
   842  
   843  		w.l.Lock()
   844  		replacements := w.outstandingAllowReplacements
   845  		w.outstandingAllowReplacements = nil
   846  		w.outstandingBatch = false
   847  		w.l.Unlock()
   848  
   849  		// Create the eval
   850  		if _, err := w.createUpdate(replacements, w.getEval()); err != nil {
   851  			w.logger.Error("failed to create evaluation for deployment", "deployment_id", w.deploymentID, "error", err)
   852  		}
   853  	})
   854  }
   855  
   856  // getEval returns an evaluation suitable for the deployment
   857  func (w *deploymentWatcher) getEval() *structs.Evaluation {
   858  	now := time.Now().UTC().UnixNano()
   859  
   860  	// During a server upgrade it's possible we end up with deployments created
   861  	// on the previous version that are then "watched" on a leader that's on
   862  	// the new version. This would result in an eval with its priority set to
   863  	// zero which would be bad. This therefore protects against that.
   864  	w.l.Lock()
   865  	priority := w.d.EvalPriority
   866  	if priority == 0 {
   867  		priority = w.j.Priority
   868  	}
   869  	w.l.Unlock()
   870  
   871  	return &structs.Evaluation{
   872  		ID:           uuid.Generate(),
   873  		Namespace:    w.j.Namespace,
   874  		Priority:     priority,
   875  		Type:         w.j.Type,
   876  		TriggeredBy:  structs.EvalTriggerDeploymentWatcher,
   877  		JobID:        w.j.ID,
   878  		DeploymentID: w.deploymentID,
   879  		Status:       structs.EvalStatusPending,
   880  		CreateTime:   now,
   881  		ModifyTime:   now,
   882  	}
   883  }
   884  
   885  // getDeploymentStatusUpdate returns a deployment status update
   886  func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate {
   887  	return &structs.DeploymentStatusUpdate{
   888  		DeploymentID:      w.deploymentID,
   889  		Status:            status,
   890  		StatusDescription: desc,
   891  	}
   892  }
   893  
   894  // getStatus returns the current status of the deployment
   895  func (w *deploymentWatcher) getStatus() string {
   896  	w.l.RLock()
   897  	defer w.l.RUnlock()
   898  	return w.d.Status
   899  }
   900  
   901  type allocUpdates struct {
   902  	allocs []*structs.AllocListStub
   903  	index  uint64
   904  	err    error
   905  }
   906  
   907  // getAllocsCh creates a channel and starts a goroutine that
   908  // 1. parks a blocking query for allocations on the state
   909  // 2. reads those and drops them on the channel
   910  // This query runs once here, but watch calls it in a loop
   911  func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates {
   912  	out := make(chan *allocUpdates, 1)
   913  	go func() {
   914  		allocs, index, err := w.getAllocs(index)
   915  		out <- &allocUpdates{
   916  			allocs: allocs,
   917  			index:  index,
   918  			err:    err,
   919  		}
   920  	}()
   921  
   922  	return out
   923  }
   924  
   925  // getAllocs retrieves the allocations that are part of the deployment blocking
   926  // at the given index.
   927  func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) {
   928  	resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx)
   929  	if err != nil {
   930  		return nil, 0, err
   931  	}
   932  	if err := w.ctx.Err(); err != nil {
   933  		return nil, 0, err
   934  	}
   935  
   936  	return resp.([]*structs.AllocListStub), index, nil
   937  }
   938  
   939  // getDeploysImpl retrieves all deployments from the passed state store.
   940  func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
   941  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   942  		return nil, 0, err
   943  	}
   944  
   945  	// Capture all the allocations
   946  	allocs, err := state.AllocsByDeployment(ws, w.deploymentID)
   947  	if err != nil {
   948  		return nil, 0, err
   949  	}
   950  
   951  	maxIndex := uint64(0)
   952  	stubs := make([]*structs.AllocListStub, 0, len(allocs))
   953  	for _, alloc := range allocs {
   954  		stubs = append(stubs, alloc.Stub(nil))
   955  
   956  		if maxIndex < alloc.ModifyIndex {
   957  			maxIndex = alloc.ModifyIndex
   958  		}
   959  	}
   960  
   961  	// Use the last index that affected the allocs table
   962  	if len(stubs) == 0 {
   963  		index, err := state.Index("allocs")
   964  		if err != nil {
   965  			return nil, index, err
   966  		}
   967  		maxIndex = index
   968  	}
   969  
   970  	return stubs, maxIndex, nil
   971  }
   972  
   973  // jobEvalStatus returns the latest eval index for a job. The index is used to
   974  // determine if an allocation update requires an evaluation to be triggered.
   975  func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, err error) {
   976  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   977  		return 0, err
   978  	}
   979  
   980  	snap, err := w.state.Snapshot()
   981  	if err != nil {
   982  		return 0, err
   983  	}
   984  
   985  	evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID)
   986  	if err != nil {
   987  		return 0, err
   988  	}
   989  
   990  	// If there are no evals for the job, return zero, since we want any
   991  	// allocation change to trigger an evaluation.
   992  	if len(evals) == 0 {
   993  		return 0, nil
   994  	}
   995  
   996  	var max uint64
   997  	for _, eval := range evals {
   998  		// A cancelled eval never impacts what the scheduler has saw, so do not
   999  		// use it's indexes.
  1000  		if eval.Status == structs.EvalStatusCancelled {
  1001  			continue
  1002  		}
  1003  
  1004  		// Prefer using the snapshot index. Otherwise use the create index
  1005  		if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex {
  1006  			max = eval.SnapshotIndex
  1007  		} else if max < eval.CreateIndex {
  1008  			max = eval.CreateIndex
  1009  		}
  1010  	}
  1011  
  1012  	return max, nil
  1013  }