github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/deploymentwatcher/deployment_watcher.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/deploymentwatcher/deployment_watcher.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	log "github.com/hashicorp/go-hclog"
    10  	memdb "github.com/hashicorp/go-memdb"
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/uuid"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"golang.org/x/time/rate"
    16  )
    17  
    18  const (
    19  	// perJobEvalBatchPeriod is the batching length before creating an evaluation to
    20  	// trigger the scheduler when allocations are marked as healthy.
    21  	perJobEvalBatchPeriod = 1 * time.Second
    22  )
    23  
    24  var (
    25  	// allowRescheduleTransition is the transition that allows failed
    26  	// allocations part of a deployment to be rescheduled. We create a one off
    27  	// variable to avoid creating a new object for every request.
    28  	allowRescheduleTransition = &structs.DesiredTransition{
    29  		Reschedule: helper.BoolToPtr(true),
    30  	}
    31  )
    32  
    33  // deploymentTriggers are the set of functions required to trigger changes on
    34  // behalf of a deployment
    35  type deploymentTriggers interface {
    36  	// createUpdate is used to create allocation desired transition updates and
    37  	// an evaluation.
    38  	createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error)
    39  
    40  	// upsertJob is used to roll back a job when autoreverting for a deployment
    41  	upsertJob(job *structs.Job) (uint64, error)
    42  
    43  	// upsertDeploymentStatusUpdate is used to upsert a deployment status update
    44  	// and an optional evaluation and job to upsert
    45  	upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error)
    46  
    47  	// upsertDeploymentPromotion is used to promote canaries in a deployment
    48  	upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error)
    49  
    50  	// upsertDeploymentAllocHealth is used to set the health of allocations in a
    51  	// deployment
    52  	upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error)
    53  }
    54  
    55  // deploymentWatcher is used to watch a single deployment and trigger the
    56  // scheduler when allocation health transitions.
    57  type deploymentWatcher struct {
    58  	// queryLimiter is used to limit the rate of blocking queries
    59  	queryLimiter *rate.Limiter
    60  
    61  	// deploymentTriggers holds the methods required to trigger changes on behalf of the
    62  	// deployment
    63  	deploymentTriggers
    64  
    65  	// DeploymentRPC holds methods for interacting with peer regions
    66  	// in enterprise edition
    67  	DeploymentRPC
    68  
    69  	// JobRPC holds methods for interacting with peer regions
    70  	// in enterprise edition
    71  	JobRPC
    72  
    73  	// state is the state that is watched for state changes.
    74  	state *state.StateStore
    75  
    76  	// deploymentID is the deployment's ID being watched
    77  	deploymentID string
    78  
    79  	// deploymentUpdateCh is triggered when there is an updated deployment
    80  	deploymentUpdateCh chan struct{}
    81  
    82  	// d is the deployment being watched
    83  	d *structs.Deployment
    84  
    85  	// j is the job the deployment is for
    86  	j *structs.Job
    87  
    88  	// outstandingBatch marks whether an outstanding function exists to create
    89  	// the evaluation. Access should be done through the lock.
    90  	outstandingBatch bool
    91  
    92  	// outstandingAllowReplacements is the map of allocations that will be
    93  	// marked as allowing a replacement. Access should be done through the lock.
    94  	outstandingAllowReplacements map[string]*structs.DesiredTransition
    95  
    96  	// latestEval is the latest eval for the job. It is updated by the watch
    97  	// loop and any time an evaluation is created. The field should be accessed
    98  	// by holding the lock or using the setter and getter methods.
    99  	latestEval uint64
   100  
   101  	logger log.Logger
   102  	ctx    context.Context
   103  	exitFn context.CancelFunc
   104  	l      sync.RWMutex
   105  }
   106  
   107  // newDeploymentWatcher returns a deployment watcher that is used to watch
   108  // deployments and trigger the scheduler as needed.
   109  func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter,
   110  	logger log.Logger, state *state.StateStore, d *structs.Deployment,
   111  	j *structs.Job, triggers deploymentTriggers,
   112  	deploymentRPC DeploymentRPC, jobRPC JobRPC) *deploymentWatcher {
   113  
   114  	ctx, exitFn := context.WithCancel(parent)
   115  	w := &deploymentWatcher{
   116  		queryLimiter:       queryLimiter,
   117  		deploymentID:       d.ID,
   118  		deploymentUpdateCh: make(chan struct{}, 1),
   119  		d:                  d,
   120  		j:                  j,
   121  		state:              state,
   122  		deploymentTriggers: triggers,
   123  		DeploymentRPC:      deploymentRPC,
   124  		JobRPC:             jobRPC,
   125  		logger:             logger.With("deployment_id", d.ID, "job", j.NamespacedID()),
   126  		ctx:                ctx,
   127  		exitFn:             exitFn,
   128  	}
   129  
   130  	// Start the long lived watcher that scans for allocation updates
   131  	go w.watch()
   132  
   133  	return w
   134  }
   135  
   136  // updateDeployment is used to update the tracked deployment.
   137  func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) {
   138  	w.l.Lock()
   139  	defer w.l.Unlock()
   140  
   141  	// Update and trigger
   142  	w.d = d
   143  	select {
   144  	case w.deploymentUpdateCh <- struct{}{}:
   145  	default:
   146  	}
   147  }
   148  
   149  // getDeployment returns the tracked deployment.
   150  func (w *deploymentWatcher) getDeployment() *structs.Deployment {
   151  	w.l.RLock()
   152  	defer w.l.RUnlock()
   153  	return w.d
   154  }
   155  
   156  func (w *deploymentWatcher) SetAllocHealth(
   157  	req *structs.DeploymentAllocHealthRequest,
   158  	resp *structs.DeploymentUpdateResponse) error {
   159  
   160  	// If we are failing the deployment, update the status and potentially
   161  	// rollback
   162  	var j *structs.Job
   163  	var u *structs.DeploymentStatusUpdate
   164  
   165  	// If there are unhealthy allocations we need to mark the deployment as
   166  	// failed and check if we should roll back to a stable job.
   167  	if l := len(req.UnhealthyAllocationIDs); l != 0 {
   168  		unhealthy := make(map[string]struct{}, l)
   169  		for _, alloc := range req.UnhealthyAllocationIDs {
   170  			unhealthy[alloc] = struct{}{}
   171  		}
   172  
   173  		// Get the allocations for the deployment
   174  		snap, err := w.state.Snapshot()
   175  		if err != nil {
   176  			return err
   177  		}
   178  
   179  		allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID)
   180  		if err != nil {
   181  			return err
   182  		}
   183  
   184  		// Determine if we should autorevert to an older job
   185  		desc := structs.DeploymentStatusDescriptionFailedAllocations
   186  		for _, alloc := range allocs {
   187  			// Check that the alloc has been marked unhealthy
   188  			if _, ok := unhealthy[alloc.ID]; !ok {
   189  				continue
   190  			}
   191  
   192  			// Check if the group has autorevert set
   193  			dstate, ok := w.getDeployment().TaskGroups[alloc.TaskGroup]
   194  			if !ok || !dstate.AutoRevert {
   195  				continue
   196  			}
   197  
   198  			var err error
   199  			j, err = w.latestStableJob()
   200  			if err != nil {
   201  				return err
   202  			}
   203  
   204  			if j != nil {
   205  				j, desc = w.handleRollbackValidity(j, desc)
   206  			}
   207  			break
   208  		}
   209  
   210  		u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   211  	}
   212  
   213  	// Canonicalize the job in case it doesn't have namespace set
   214  	j.Canonicalize()
   215  
   216  	// Create the request
   217  	areq := &structs.ApplyDeploymentAllocHealthRequest{
   218  		DeploymentAllocHealthRequest: *req,
   219  		Timestamp:                    time.Now(),
   220  		Eval:                         w.getEval(),
   221  		DeploymentUpdate:             u,
   222  		Job:                          j,
   223  	}
   224  
   225  	index, err := w.upsertDeploymentAllocHealth(areq)
   226  	if err != nil {
   227  		return err
   228  	}
   229  
   230  	// Build the response
   231  	resp.EvalID = areq.Eval.ID
   232  	resp.EvalCreateIndex = index
   233  	resp.DeploymentModifyIndex = index
   234  	resp.Index = index
   235  	if j != nil {
   236  		resp.RevertedJobVersion = helper.Uint64ToPtr(j.Version)
   237  	}
   238  	return nil
   239  }
   240  
   241  // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job
   242  // Returns a modified description and job accordingly.
   243  func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) {
   244  	// Only rollback if job being changed has a different spec.
   245  	// This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback
   246  	// If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed.
   247  	if w.j.SpecChanged(rollbackJob) {
   248  		desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version)
   249  	} else {
   250  		desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version)
   251  		rollbackJob = nil
   252  	}
   253  	return rollbackJob, desc
   254  }
   255  
   256  func (w *deploymentWatcher) PromoteDeployment(
   257  	req *structs.DeploymentPromoteRequest,
   258  	resp *structs.DeploymentUpdateResponse) error {
   259  
   260  	// Create the request
   261  	areq := &structs.ApplyDeploymentPromoteRequest{
   262  		DeploymentPromoteRequest: *req,
   263  		Eval:                     w.getEval(),
   264  	}
   265  
   266  	index, err := w.upsertDeploymentPromotion(areq)
   267  	if err != nil {
   268  		return err
   269  	}
   270  
   271  	// Build the response
   272  	resp.EvalID = areq.Eval.ID
   273  	resp.EvalCreateIndex = index
   274  	resp.DeploymentModifyIndex = index
   275  	resp.Index = index
   276  	return nil
   277  }
   278  
   279  // autoPromoteDeployment creates a synthetic promotion request, and upserts it for processing
   280  func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStub) error {
   281  	d := w.getDeployment()
   282  	if !d.HasPlacedCanaries() || !d.RequiresPromotion() {
   283  		return nil
   284  	}
   285  
   286  	// AutoPromote iff every task group is marked auto_promote and is healthy. The whole
   287  	// job version has been incremented, so we promote together. See also AutoRevert
   288  	for _, dstate := range d.TaskGroups {
   289  		if !dstate.AutoPromote || dstate.DesiredCanaries != len(dstate.PlacedCanaries) {
   290  			return nil
   291  		}
   292  
   293  		// Find the health status of each canary
   294  		for _, c := range dstate.PlacedCanaries {
   295  			for _, a := range allocs {
   296  				if c == a.ID && !a.DeploymentStatus.IsHealthy() {
   297  					return nil
   298  				}
   299  			}
   300  		}
   301  	}
   302  
   303  	// Send the request
   304  	_, err := w.upsertDeploymentPromotion(&structs.ApplyDeploymentPromoteRequest{
   305  		DeploymentPromoteRequest: structs.DeploymentPromoteRequest{DeploymentID: d.GetID(), All: true},
   306  		Eval:                     w.getEval(),
   307  	})
   308  	return err
   309  }
   310  
   311  func (w *deploymentWatcher) PauseDeployment(
   312  	req *structs.DeploymentPauseRequest,
   313  	resp *structs.DeploymentUpdateResponse) error {
   314  	// Determine the status we should transition to and if we need to create an
   315  	// evaluation
   316  	status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused
   317  	var eval *structs.Evaluation
   318  	evalID := ""
   319  	if !req.Pause {
   320  		status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning
   321  		eval = w.getEval()
   322  		evalID = eval.ID
   323  	}
   324  	update := w.getDeploymentStatusUpdate(status, desc)
   325  
   326  	// Commit the change
   327  	i, err := w.upsertDeploymentStatusUpdate(update, eval, nil)
   328  	if err != nil {
   329  		return err
   330  	}
   331  
   332  	// Build the response
   333  	if evalID != "" {
   334  		resp.EvalID = evalID
   335  		resp.EvalCreateIndex = i
   336  	}
   337  	resp.DeploymentModifyIndex = i
   338  	resp.Index = i
   339  	return nil
   340  }
   341  
   342  func (w *deploymentWatcher) FailDeployment(
   343  	req *structs.DeploymentFailRequest,
   344  	resp *structs.DeploymentUpdateResponse) error {
   345  
   346  	status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser
   347  
   348  	// Determine if we should rollback
   349  	rollback := false
   350  	for _, dstate := range w.getDeployment().TaskGroups {
   351  		if dstate.AutoRevert {
   352  			rollback = true
   353  			break
   354  		}
   355  	}
   356  
   357  	var rollbackJob *structs.Job
   358  	if rollback {
   359  		var err error
   360  		rollbackJob, err = w.latestStableJob()
   361  		if err != nil {
   362  			return err
   363  		}
   364  
   365  		if rollbackJob != nil {
   366  			rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc)
   367  		} else {
   368  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   369  		}
   370  	}
   371  
   372  	// Commit the change
   373  	update := w.getDeploymentStatusUpdate(status, desc)
   374  	eval := w.getEval()
   375  	i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob)
   376  	if err != nil {
   377  		return err
   378  	}
   379  
   380  	// Build the response
   381  	resp.EvalID = eval.ID
   382  	resp.EvalCreateIndex = i
   383  	resp.DeploymentModifyIndex = i
   384  	resp.Index = i
   385  	if rollbackJob != nil {
   386  		resp.RevertedJobVersion = helper.Uint64ToPtr(rollbackJob.Version)
   387  	}
   388  	return nil
   389  }
   390  
   391  // StopWatch stops watching the deployment. This should be called whenever a
   392  // deployment is completed or the watcher is no longer needed.
   393  func (w *deploymentWatcher) StopWatch() {
   394  	w.exitFn()
   395  }
   396  
   397  // watch is the long running watcher that watches for both allocation and
   398  // deployment changes. Its function is to create evaluations to trigger the
   399  // scheduler when more progress can be made, to fail the deployment if it has
   400  // failed and potentially rolling back the job. Progress can be made when an
   401  // allocation transitions to healthy, so we create an eval.
   402  func (w *deploymentWatcher) watch() {
   403  	// Get the deadline. This is likely a zero time to begin with but we need to
   404  	// handle the case that the deployment has already progressed and we are now
   405  	// just starting to watch it. This must likely would occur if there was a
   406  	// leader transition and we are now starting our watcher.
   407  	currentDeadline := w.getDeploymentProgressCutoff(w.getDeployment())
   408  	var deadlineTimer *time.Timer
   409  	if currentDeadline.IsZero() {
   410  		deadlineTimer = time.NewTimer(0)
   411  		if !deadlineTimer.Stop() {
   412  			<-deadlineTimer.C
   413  		}
   414  	} else {
   415  		deadlineTimer = time.NewTimer(time.Until(currentDeadline))
   416  	}
   417  
   418  	allocIndex := uint64(1)
   419  	var updates *allocUpdates
   420  
   421  	rollback, deadlineHit := false, false
   422  
   423  FAIL:
   424  	for {
   425  		select {
   426  		case <-w.ctx.Done():
   427  			// This is the successful case, and we stop the loop
   428  			return
   429  		case <-deadlineTimer.C:
   430  			// We have hit the progress deadline, so fail the deployment
   431  			// unless we're waiting for manual promotion. We need to determine
   432  			// whether we should roll back the job by inspecting which allocs
   433  			// as part of the deployment are healthy and which aren't. The
   434  			// deadlineHit flag is never reset, so even in the case of a
   435  			// manual promotion, we'll describe any failure as a progress
   436  			// deadline failure at this point.
   437  			deadlineHit = true
   438  			fail, rback, err := w.shouldFail()
   439  			if err != nil {
   440  				w.logger.Error("failed to determine whether to rollback job", "error", err)
   441  			}
   442  			if !fail {
   443  				w.logger.Debug("skipping deadline")
   444  				continue
   445  			}
   446  
   447  			w.logger.Debug("deadline hit", "rollback", rback)
   448  			rollback = rback
   449  			err = w.nextRegion(structs.DeploymentStatusFailed)
   450  			if err != nil {
   451  				w.logger.Error("multiregion deployment error", "error", err)
   452  			}
   453  			break FAIL
   454  		case <-w.deploymentUpdateCh:
   455  			// Get the updated deployment and check if we should change the
   456  			// deadline timer
   457  			next := w.getDeploymentProgressCutoff(w.getDeployment())
   458  			if !next.Equal(currentDeadline) {
   459  				prevDeadlineZero := currentDeadline.IsZero()
   460  				currentDeadline = next
   461  				// The most recent deadline can be zero if no allocs were created for this deployment.
   462  				// The deadline timer would have already been stopped once in that case. To prevent
   463  				// deadlocking on the already stopped deadline timer, we only drain the channel if
   464  				// the previous deadline was not zero.
   465  				if !prevDeadlineZero && !deadlineTimer.Stop() {
   466  					select {
   467  					case <-deadlineTimer.C:
   468  					default:
   469  					}
   470  				}
   471  
   472  				// If the next deadline is zero, we should not reset the timer
   473  				// as we aren't tracking towards a progress deadline yet. This
   474  				// can happen if you have multiple task groups with progress
   475  				// deadlines and one of the task groups hasn't made any
   476  				// placements. As soon as the other task group finishes its
   477  				// rollout, the next progress deadline becomes zero, so we want
   478  				// to avoid resetting, causing a deployment failure.
   479  				if !next.IsZero() {
   480  					deadlineTimer.Reset(time.Until(next))
   481  					w.logger.Trace("resetting deadline")
   482  				}
   483  			}
   484  
   485  			err := w.nextRegion(w.getStatus())
   486  			if err != nil {
   487  				break FAIL
   488  			}
   489  
   490  		case updates = <-w.getAllocsCh(allocIndex):
   491  			if err := updates.err; err != nil {
   492  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   493  					return
   494  				}
   495  
   496  				w.logger.Error("failed to retrieve allocations", "error", err)
   497  				return
   498  			}
   499  			allocIndex = updates.index
   500  
   501  			// We have allocation changes for this deployment so determine the
   502  			// steps to take.
   503  			res, err := w.handleAllocUpdate(updates.allocs)
   504  			if err != nil {
   505  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   506  					return
   507  				}
   508  
   509  				w.logger.Error("failed handling allocation updates", "error", err)
   510  				return
   511  			}
   512  
   513  			// The deployment has failed, so break out of the watch loop and
   514  			// handle the failure
   515  			if res.failDeployment {
   516  				rollback = res.rollback
   517  				err := w.nextRegion(structs.DeploymentStatusFailed)
   518  				if err != nil {
   519  					w.logger.Error("multiregion deployment error", "error", err)
   520  				}
   521  				break FAIL
   522  			}
   523  
   524  			// If permitted, automatically promote this canary deployment
   525  			err = w.autoPromoteDeployment(updates.allocs)
   526  			if err != nil {
   527  				w.logger.Error("failed to auto promote deployment", "error", err)
   528  			}
   529  
   530  			// Create an eval to push the deployment along
   531  			if res.createEval || len(res.allowReplacements) != 0 {
   532  				w.createBatchedUpdate(res.allowReplacements, allocIndex)
   533  			}
   534  		}
   535  	}
   536  
   537  	// Change the deployments status to failed
   538  	desc := structs.DeploymentStatusDescriptionFailedAllocations
   539  	if deadlineHit {
   540  		desc = structs.DeploymentStatusDescriptionProgressDeadline
   541  	}
   542  
   543  	// Rollback to the old job if necessary
   544  	var j *structs.Job
   545  	if rollback {
   546  		var err error
   547  		j, err = w.latestStableJob()
   548  		if err != nil {
   549  			w.logger.Error("failed to lookup latest stable job", "error", err)
   550  		}
   551  
   552  		// Description should include that the job is being rolled back to
   553  		// version N
   554  		if j != nil {
   555  			j, desc = w.handleRollbackValidity(j, desc)
   556  		} else {
   557  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   558  		}
   559  	}
   560  
   561  	// Update the status of the deployment to failed and create an evaluation.
   562  	e := w.getEval()
   563  	u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   564  	if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil {
   565  		w.logger.Error("failed to update deployment status", "error", err)
   566  	}
   567  }
   568  
   569  // allocUpdateResult is used to return the desired actions given the newest set
   570  // of allocations for the deployment.
   571  type allocUpdateResult struct {
   572  	createEval        bool
   573  	failDeployment    bool
   574  	rollback          bool
   575  	allowReplacements []string
   576  }
   577  
   578  // handleAllocUpdate is used to compute the set of actions to take based on the
   579  // updated allocations for the deployment.
   580  func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) {
   581  	var res allocUpdateResult
   582  
   583  	// Get the latest evaluation index
   584  	latestEval, err := w.jobEvalStatus()
   585  	if err != nil {
   586  		if err == context.Canceled || w.ctx.Err() == context.Canceled {
   587  			return res, err
   588  		}
   589  
   590  		return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err)
   591  	}
   592  
   593  	deployment := w.getDeployment()
   594  	for _, alloc := range allocs {
   595  		dstate, ok := deployment.TaskGroups[alloc.TaskGroup]
   596  		if !ok {
   597  			continue
   598  		}
   599  
   600  		// Determine if the update stanza for this group is progress based
   601  		progressBased := dstate.ProgressDeadline != 0
   602  
   603  		// Check if the allocation has failed and we need to mark it for allow
   604  		// replacements
   605  		if progressBased && alloc.DeploymentStatus.IsUnhealthy() &&
   606  			deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
   607  			res.allowReplacements = append(res.allowReplacements, alloc.ID)
   608  			continue
   609  		}
   610  
   611  		// We need to create an eval so the job can progress.
   612  		if alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval {
   613  			res.createEval = true
   614  		}
   615  
   616  		// If the group is using a progress deadline, we don't have to do anything.
   617  		if progressBased {
   618  			continue
   619  		}
   620  
   621  		// Fail on the first bad allocation
   622  		if alloc.DeploymentStatus.IsUnhealthy() {
   623  			// Check if the group has autorevert set
   624  			if dstate.AutoRevert {
   625  				res.rollback = true
   626  			}
   627  
   628  			// Since we have an unhealthy allocation, fail the deployment
   629  			res.failDeployment = true
   630  		}
   631  
   632  		// All conditions have been hit so we can break
   633  		if res.createEval && res.failDeployment && res.rollback {
   634  			break
   635  		}
   636  	}
   637  
   638  	return res, nil
   639  }
   640  
   641  // shouldFail returns whether the job should be failed and whether it should
   642  // rolled back to an earlier stable version by examining the allocations in the
   643  // deployment.
   644  func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) {
   645  	snap, err := w.state.Snapshot()
   646  	if err != nil {
   647  		return false, false, err
   648  	}
   649  
   650  	d, err := snap.DeploymentByID(nil, w.deploymentID)
   651  	if err != nil {
   652  		return false, false, err
   653  	}
   654  	if d == nil {
   655  		// The deployment wasn't in the state store, possibly due to a system gc
   656  		return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID)
   657  	}
   658  
   659  	fail = false
   660  	for tg, dstate := range d.TaskGroups {
   661  		// If we are in a canary state we fail if there aren't enough healthy
   662  		// allocs to satisfy DesiredCanaries
   663  		if dstate.DesiredCanaries > 0 && !dstate.Promoted {
   664  			if dstate.HealthyAllocs >= dstate.DesiredCanaries {
   665  				continue
   666  			}
   667  		} else if dstate.HealthyAllocs >= dstate.DesiredTotal {
   668  			continue
   669  		}
   670  
   671  		// We have failed this TG
   672  		fail = true
   673  
   674  		// We don't need to autorevert this group
   675  		upd := w.j.LookupTaskGroup(tg).Update
   676  		if upd == nil || !upd.AutoRevert {
   677  			continue
   678  		}
   679  
   680  		// Unhealthy allocs and we need to autorevert
   681  		return fail, true, nil
   682  	}
   683  
   684  	return fail, false, nil
   685  }
   686  
   687  // getDeploymentProgressCutoff returns the progress cutoff for the given
   688  // deployment
   689  func (w *deploymentWatcher) getDeploymentProgressCutoff(d *structs.Deployment) time.Time {
   690  	var next time.Time
   691  	doneTGs := w.doneGroups(d)
   692  	for name, dstate := range d.TaskGroups {
   693  		// This task group is done so we don't have to concern ourselves with
   694  		// its progress deadline.
   695  		if done, ok := doneTGs[name]; ok && done {
   696  			continue
   697  		}
   698  
   699  		if dstate.RequireProgressBy.IsZero() {
   700  			continue
   701  		}
   702  
   703  		if next.IsZero() || dstate.RequireProgressBy.Before(next) {
   704  			next = dstate.RequireProgressBy
   705  		}
   706  	}
   707  	return next
   708  }
   709  
   710  // doneGroups returns a map of task group to whether the deployment appears to
   711  // be done for the group. A true value doesn't mean no more action will be taken
   712  // in the life time of the deployment because there could always be node
   713  // failures, or rescheduling events.
   714  func (w *deploymentWatcher) doneGroups(d *structs.Deployment) map[string]bool {
   715  	if d == nil {
   716  		return nil
   717  	}
   718  
   719  	// Collect the allocations by the task group
   720  	snap, err := w.state.Snapshot()
   721  	if err != nil {
   722  		return nil
   723  	}
   724  
   725  	allocs, err := snap.AllocsByDeployment(nil, d.ID)
   726  	if err != nil {
   727  		return nil
   728  	}
   729  
   730  	// Go through the allocs and count up how many healthy allocs we have
   731  	healthy := make(map[string]int, len(d.TaskGroups))
   732  	for _, a := range allocs {
   733  		if a.TerminalStatus() || !a.DeploymentStatus.IsHealthy() {
   734  			continue
   735  		}
   736  		healthy[a.TaskGroup]++
   737  	}
   738  
   739  	// Go through each group and check if it done
   740  	groups := make(map[string]bool, len(d.TaskGroups))
   741  	for name, dstate := range d.TaskGroups {
   742  		// Requires promotion
   743  		if dstate.DesiredCanaries != 0 && !dstate.Promoted {
   744  			groups[name] = false
   745  			continue
   746  		}
   747  
   748  		// Check we have enough healthy currently running allocations
   749  		groups[name] = healthy[name] >= dstate.DesiredTotal
   750  	}
   751  
   752  	return groups
   753  }
   754  
   755  // latestStableJob returns the latest stable job. It may be nil if none exist
   756  func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
   757  	snap, err := w.state.Snapshot()
   758  	if err != nil {
   759  		return nil, err
   760  	}
   761  
   762  	versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID)
   763  	if err != nil {
   764  		return nil, err
   765  	}
   766  
   767  	var stable *structs.Job
   768  	for _, job := range versions {
   769  		if job.Stable {
   770  			stable = job
   771  			break
   772  		}
   773  	}
   774  
   775  	return stable, nil
   776  }
   777  
   778  // createBatchedUpdate creates an eval for the given index as well as updating
   779  // the given allocations to allow them to reschedule.
   780  func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
   781  	w.l.Lock()
   782  	defer w.l.Unlock()
   783  
   784  	// Store the allocations that can be replaced
   785  	for _, allocID := range allowReplacements {
   786  		if w.outstandingAllowReplacements == nil {
   787  			w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
   788  		}
   789  		w.outstandingAllowReplacements[allocID] = allowRescheduleTransition
   790  	}
   791  
   792  	if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
   793  		return
   794  	}
   795  
   796  	w.outstandingBatch = true
   797  
   798  	time.AfterFunc(perJobEvalBatchPeriod, func() {
   799  		// If the timer has been created and then we shutdown, we need to no-op
   800  		// the evaluation creation.
   801  		select {
   802  		case <-w.ctx.Done():
   803  			return
   804  		default:
   805  		}
   806  
   807  		w.l.Lock()
   808  		replacements := w.outstandingAllowReplacements
   809  		w.outstandingAllowReplacements = nil
   810  		w.outstandingBatch = false
   811  		w.l.Unlock()
   812  
   813  		// Create the eval
   814  		if _, err := w.createUpdate(replacements, w.getEval()); err != nil {
   815  			w.logger.Error("failed to create evaluation for deployment", "deployment_id", w.deploymentID, "error", err)
   816  		}
   817  	})
   818  }
   819  
   820  // getEval returns an evaluation suitable for the deployment
   821  func (w *deploymentWatcher) getEval() *structs.Evaluation {
   822  	now := time.Now().UTC().UnixNano()
   823  	return &structs.Evaluation{
   824  		ID:           uuid.Generate(),
   825  		Namespace:    w.j.Namespace,
   826  		Priority:     w.j.Priority,
   827  		Type:         w.j.Type,
   828  		TriggeredBy:  structs.EvalTriggerDeploymentWatcher,
   829  		JobID:        w.j.ID,
   830  		DeploymentID: w.deploymentID,
   831  		Status:       structs.EvalStatusPending,
   832  		CreateTime:   now,
   833  		ModifyTime:   now,
   834  	}
   835  }
   836  
   837  // getDeploymentStatusUpdate returns a deployment status update
   838  func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate {
   839  	return &structs.DeploymentStatusUpdate{
   840  		DeploymentID:      w.deploymentID,
   841  		Status:            status,
   842  		StatusDescription: desc,
   843  	}
   844  }
   845  
   846  // getStatus returns the current status of the deployment
   847  func (w *deploymentWatcher) getStatus() string {
   848  	w.l.RLock()
   849  	defer w.l.RUnlock()
   850  	return w.d.Status
   851  }
   852  
   853  type allocUpdates struct {
   854  	allocs []*structs.AllocListStub
   855  	index  uint64
   856  	err    error
   857  }
   858  
   859  // getAllocsCh creates a channel and starts a goroutine that
   860  // 1. parks a blocking query for allocations on the state
   861  // 2. reads those and drops them on the channel
   862  // This query runs once here, but watch calls it in a loop
   863  func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates {
   864  	out := make(chan *allocUpdates, 1)
   865  	go func() {
   866  		allocs, index, err := w.getAllocs(index)
   867  		out <- &allocUpdates{
   868  			allocs: allocs,
   869  			index:  index,
   870  			err:    err,
   871  		}
   872  	}()
   873  
   874  	return out
   875  }
   876  
   877  // getAllocs retrieves the allocations that are part of the deployment blocking
   878  // at the given index.
   879  func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) {
   880  	resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx)
   881  	if err != nil {
   882  		return nil, 0, err
   883  	}
   884  	if err := w.ctx.Err(); err != nil {
   885  		return nil, 0, err
   886  	}
   887  
   888  	return resp.([]*structs.AllocListStub), index, nil
   889  }
   890  
   891  // getDeploysImpl retrieves all deployments from the passed state store.
   892  func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
   893  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   894  		return nil, 0, err
   895  	}
   896  
   897  	// Capture all the allocations
   898  	allocs, err := state.AllocsByDeployment(ws, w.deploymentID)
   899  	if err != nil {
   900  		return nil, 0, err
   901  	}
   902  
   903  	maxIndex := uint64(0)
   904  	stubs := make([]*structs.AllocListStub, 0, len(allocs))
   905  	for _, alloc := range allocs {
   906  		stubs = append(stubs, alloc.Stub(nil))
   907  
   908  		if maxIndex < alloc.ModifyIndex {
   909  			maxIndex = alloc.ModifyIndex
   910  		}
   911  	}
   912  
   913  	// Use the last index that affected the allocs table
   914  	if len(stubs) == 0 {
   915  		index, err := state.Index("allocs")
   916  		if err != nil {
   917  			return nil, index, err
   918  		}
   919  		maxIndex = index
   920  	}
   921  
   922  	return stubs, maxIndex, nil
   923  }
   924  
   925  // jobEvalStatus returns the latest eval index for a job. The index is used to
   926  // determine if an allocation update requires an evaluation to be triggered.
   927  func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, err error) {
   928  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   929  		return 0, err
   930  	}
   931  
   932  	snap, err := w.state.Snapshot()
   933  	if err != nil {
   934  		return 0, err
   935  	}
   936  
   937  	evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID)
   938  	if err != nil {
   939  		return 0, err
   940  	}
   941  
   942  	// If there are no evals for the job, return zero, since we want any
   943  	// allocation change to trigger an evaluation.
   944  	if len(evals) == 0 {
   945  		return 0, nil
   946  	}
   947  
   948  	var max uint64
   949  	for _, eval := range evals {
   950  		// A cancelled eval never impacts what the scheduler has saw, so do not
   951  		// use it's indexes.
   952  		if eval.Status == structs.EvalStatusCancelled {
   953  			continue
   954  		}
   955  
   956  		// Prefer using the snapshot index. Otherwise use the create index
   957  		if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex {
   958  			max = eval.SnapshotIndex
   959  		} else if max < eval.CreateIndex {
   960  			max = eval.CreateIndex
   961  		}
   962  	}
   963  
   964  	return max, nil
   965  }