github.com/smithx10/nomad@v0.9.1-rc1/nomad/deploymentwatcher/deployment_watcher.go

github.com/smithx10/nomad@v0.9.1-rc1/nomad/deploymentwatcher/deployment_watcher.go (about)

     1  package deploymentwatcher
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	log "github.com/hashicorp/go-hclog"
    10  	memdb "github.com/hashicorp/go-memdb"
    11  	"github.com/hashicorp/nomad/helper"
    12  	"github.com/hashicorp/nomad/helper/uuid"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"golang.org/x/time/rate"
    16  )
    17  
    18  const (
    19  	// perJobEvalBatchPeriod is the batching length before creating an evaluation to
    20  	// trigger the scheduler when allocations are marked as healthy.
    21  	perJobEvalBatchPeriod = 1 * time.Second
    22  )
    23  
    24  var (
    25  	// allowRescheduleTransition is the transition that allows failed
    26  	// allocations part of a deployment to be rescheduled. We create a one off
    27  	// variable to avoid creating a new object for every request.
    28  	allowRescheduleTransition = &structs.DesiredTransition{
    29  		Reschedule: helper.BoolToPtr(true),
    30  	}
    31  )
    32  
    33  // deploymentTriggers are the set of functions required to trigger changes on
    34  // behalf of a deployment
    35  type deploymentTriggers interface {
    36  	// createUpdate is used to create allocation desired transition updates and
    37  	// an evaluation.
    38  	createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error)
    39  
    40  	// upsertJob is used to roll back a job when autoreverting for a deployment
    41  	upsertJob(job *structs.Job) (uint64, error)
    42  
    43  	// upsertDeploymentStatusUpdate is used to upsert a deployment status update
    44  	// and an optional evaluation and job to upsert
    45  	upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error)
    46  
    47  	// upsertDeploymentPromotion is used to promote canaries in a deployment
    48  	upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error)
    49  
    50  	// upsertDeploymentAllocHealth is used to set the health of allocations in a
    51  	// deployment
    52  	upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error)
    53  }
    54  
    55  // deploymentWatcher is used to watch a single deployment and trigger the
    56  // scheduler when allocation health transitions.
    57  type deploymentWatcher struct {
    58  	// queryLimiter is used to limit the rate of blocking queries
    59  	queryLimiter *rate.Limiter
    60  
    61  	// deploymentTriggers holds the methods required to trigger changes on behalf of the
    62  	// deployment
    63  	deploymentTriggers
    64  
    65  	// state is the state that is watched for state changes.
    66  	state *state.StateStore
    67  
    68  	// deploymentID is the deployment's ID being watched
    69  	deploymentID string
    70  
    71  	// deploymentUpdateCh is triggered when there is an updated deployment
    72  	deploymentUpdateCh chan struct{}
    73  
    74  	// d is the deployment being watched
    75  	d *structs.Deployment
    76  
    77  	// j is the job the deployment is for
    78  	j *structs.Job
    79  
    80  	// outstandingBatch marks whether an outstanding function exists to create
    81  	// the evaluation. Access should be done through the lock.
    82  	outstandingBatch bool
    83  
    84  	// outstandingAllowReplacements is the map of allocations that will be
    85  	// marked as allowing a replacement. Access should be done through the lock.
    86  	outstandingAllowReplacements map[string]*structs.DesiredTransition
    87  
    88  	// latestEval is the latest eval for the job. It is updated by the watch
    89  	// loop and any time an evaluation is created. The field should be accessed
    90  	// by holding the lock or using the setter and getter methods.
    91  	latestEval uint64
    92  
    93  	logger log.Logger
    94  	ctx    context.Context
    95  	exitFn context.CancelFunc
    96  	l      sync.RWMutex
    97  }
    98  
    99  // newDeploymentWatcher returns a deployment watcher that is used to watch
   100  // deployments and trigger the scheduler as needed.
   101  func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter,
   102  	logger log.Logger, state *state.StateStore, d *structs.Deployment,
   103  	j *structs.Job, triggers deploymentTriggers) *deploymentWatcher {
   104  
   105  	ctx, exitFn := context.WithCancel(parent)
   106  	w := &deploymentWatcher{
   107  		queryLimiter:       queryLimiter,
   108  		deploymentID:       d.ID,
   109  		deploymentUpdateCh: make(chan struct{}, 1),
   110  		d:                  d,
   111  		j:                  j,
   112  		state:              state,
   113  		deploymentTriggers: triggers,
   114  		logger:             logger.With("deployment_id", d.ID, "job", j.NamespacedID()),
   115  		ctx:                ctx,
   116  		exitFn:             exitFn,
   117  	}
   118  
   119  	// Start the long lived watcher that scans for allocation updates
   120  	go w.watch()
   121  
   122  	return w
   123  }
   124  
   125  // updateDeployment is used to update the tracked deployment.
   126  func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) {
   127  	w.l.Lock()
   128  	defer w.l.Unlock()
   129  
   130  	// Update and trigger
   131  	w.d = d
   132  	select {
   133  	case w.deploymentUpdateCh <- struct{}{}:
   134  	default:
   135  	}
   136  }
   137  
   138  // getDeployment returns the tracked deployment.
   139  func (w *deploymentWatcher) getDeployment() *structs.Deployment {
   140  	w.l.RLock()
   141  	defer w.l.RUnlock()
   142  	return w.d
   143  }
   144  
   145  func (w *deploymentWatcher) SetAllocHealth(
   146  	req *structs.DeploymentAllocHealthRequest,
   147  	resp *structs.DeploymentUpdateResponse) error {
   148  
   149  	// If we are failing the deployment, update the status and potentially
   150  	// rollback
   151  	var j *structs.Job
   152  	var u *structs.DeploymentStatusUpdate
   153  
   154  	// If there are unhealthy allocations we need to mark the deployment as
   155  	// failed and check if we should roll back to a stable job.
   156  	if l := len(req.UnhealthyAllocationIDs); l != 0 {
   157  		unhealthy := make(map[string]struct{}, l)
   158  		for _, alloc := range req.UnhealthyAllocationIDs {
   159  			unhealthy[alloc] = struct{}{}
   160  		}
   161  
   162  		// Get the allocations for the deployment
   163  		snap, err := w.state.Snapshot()
   164  		if err != nil {
   165  			return err
   166  		}
   167  
   168  		allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID)
   169  		if err != nil {
   170  			return err
   171  		}
   172  
   173  		// Determine if we should autorevert to an older job
   174  		desc := structs.DeploymentStatusDescriptionFailedAllocations
   175  		for _, alloc := range allocs {
   176  			// Check that the alloc has been marked unhealthy
   177  			if _, ok := unhealthy[alloc.ID]; !ok {
   178  				continue
   179  			}
   180  
   181  			// Check if the group has autorevert set
   182  			group, ok := w.getDeployment().TaskGroups[alloc.TaskGroup]
   183  			if !ok || !group.AutoRevert {
   184  				continue
   185  			}
   186  
   187  			var err error
   188  			j, err = w.latestStableJob()
   189  			if err != nil {
   190  				return err
   191  			}
   192  
   193  			if j != nil {
   194  				j, desc = w.handleRollbackValidity(j, desc)
   195  			}
   196  			break
   197  		}
   198  
   199  		u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   200  	}
   201  
   202  	// Canonicalize the job in case it doesn't have namespace set
   203  	j.Canonicalize()
   204  
   205  	// Create the request
   206  	areq := &structs.ApplyDeploymentAllocHealthRequest{
   207  		DeploymentAllocHealthRequest: *req,
   208  		Timestamp:                    time.Now(),
   209  		Eval:                         w.getEval(),
   210  		DeploymentUpdate:             u,
   211  		Job:                          j,
   212  	}
   213  
   214  	index, err := w.upsertDeploymentAllocHealth(areq)
   215  	if err != nil {
   216  		return err
   217  	}
   218  
   219  	// Build the response
   220  	resp.EvalID = areq.Eval.ID
   221  	resp.EvalCreateIndex = index
   222  	resp.DeploymentModifyIndex = index
   223  	resp.Index = index
   224  	if j != nil {
   225  		resp.RevertedJobVersion = helper.Uint64ToPtr(j.Version)
   226  	}
   227  	return nil
   228  }
   229  
   230  // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job
   231  // Returns a modified description and job accordingly.
   232  func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) {
   233  	// Only rollback if job being changed has a different spec.
   234  	// This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback
   235  	// If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed.
   236  	if w.j.SpecChanged(rollbackJob) {
   237  		desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version)
   238  	} else {
   239  		desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version)
   240  		rollbackJob = nil
   241  	}
   242  	return rollbackJob, desc
   243  }
   244  
   245  func (w *deploymentWatcher) PromoteDeployment(
   246  	req *structs.DeploymentPromoteRequest,
   247  	resp *structs.DeploymentUpdateResponse) error {
   248  
   249  	// Create the request
   250  	areq := &structs.ApplyDeploymentPromoteRequest{
   251  		DeploymentPromoteRequest: *req,
   252  		Eval:                     w.getEval(),
   253  	}
   254  
   255  	index, err := w.upsertDeploymentPromotion(areq)
   256  	if err != nil {
   257  		return err
   258  	}
   259  
   260  	// Build the response
   261  	resp.EvalID = areq.Eval.ID
   262  	resp.EvalCreateIndex = index
   263  	resp.DeploymentModifyIndex = index
   264  	resp.Index = index
   265  	return nil
   266  }
   267  
   268  func (w *deploymentWatcher) PauseDeployment(
   269  	req *structs.DeploymentPauseRequest,
   270  	resp *structs.DeploymentUpdateResponse) error {
   271  	// Determine the status we should transition to and if we need to create an
   272  	// evaluation
   273  	status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused
   274  	var eval *structs.Evaluation
   275  	evalID := ""
   276  	if !req.Pause {
   277  		status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning
   278  		eval = w.getEval()
   279  		evalID = eval.ID
   280  	}
   281  	update := w.getDeploymentStatusUpdate(status, desc)
   282  
   283  	// Commit the change
   284  	i, err := w.upsertDeploymentStatusUpdate(update, eval, nil)
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	// Build the response
   290  	if evalID != "" {
   291  		resp.EvalID = evalID
   292  		resp.EvalCreateIndex = i
   293  	}
   294  	resp.DeploymentModifyIndex = i
   295  	resp.Index = i
   296  	return nil
   297  }
   298  
   299  func (w *deploymentWatcher) FailDeployment(
   300  	req *structs.DeploymentFailRequest,
   301  	resp *structs.DeploymentUpdateResponse) error {
   302  
   303  	status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser
   304  
   305  	// Determine if we should rollback
   306  	rollback := false
   307  	for _, state := range w.getDeployment().TaskGroups {
   308  		if state.AutoRevert {
   309  			rollback = true
   310  			break
   311  		}
   312  	}
   313  
   314  	var rollbackJob *structs.Job
   315  	if rollback {
   316  		var err error
   317  		rollbackJob, err = w.latestStableJob()
   318  		if err != nil {
   319  			return err
   320  		}
   321  
   322  		if rollbackJob != nil {
   323  			rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc)
   324  		} else {
   325  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   326  		}
   327  	}
   328  
   329  	// Commit the change
   330  	update := w.getDeploymentStatusUpdate(status, desc)
   331  	eval := w.getEval()
   332  	i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob)
   333  	if err != nil {
   334  		return err
   335  	}
   336  
   337  	// Build the response
   338  	resp.EvalID = eval.ID
   339  	resp.EvalCreateIndex = i
   340  	resp.DeploymentModifyIndex = i
   341  	resp.Index = i
   342  	if rollbackJob != nil {
   343  		resp.RevertedJobVersion = helper.Uint64ToPtr(rollbackJob.Version)
   344  	}
   345  	return nil
   346  }
   347  
   348  // StopWatch stops watching the deployment. This should be called whenever a
   349  // deployment is completed or the watcher is no longer needed.
   350  func (w *deploymentWatcher) StopWatch() {
   351  	w.exitFn()
   352  }
   353  
   354  // watch is the long running watcher that watches for both allocation and
   355  // deployment changes. Its function is to create evaluations to trigger the
   356  // scheduler when more progress can be made, to fail the deployment if it has
   357  // failed and potentially rolling back the job. Progress can be made when an
   358  // allocation transitions to healthy, so we create an eval.
   359  func (w *deploymentWatcher) watch() {
   360  	// Get the deadline. This is likely a zero time to begin with but we need to
   361  	// handle the case that the deployment has already progressed and we are now
   362  	// just starting to watch it. This must likely would occur if there was a
   363  	// leader transition and we are now starting our watcher.
   364  	currentDeadline := w.getDeploymentProgressCutoff(w.getDeployment())
   365  	var deadlineTimer *time.Timer
   366  	if currentDeadline.IsZero() {
   367  		deadlineTimer = time.NewTimer(0)
   368  		if !deadlineTimer.Stop() {
   369  			<-deadlineTimer.C
   370  		}
   371  	} else {
   372  		deadlineTimer = time.NewTimer(currentDeadline.Sub(time.Now()))
   373  	}
   374  
   375  	allocIndex := uint64(1)
   376  	var updates *allocUpdates
   377  
   378  	rollback, deadlineHit := false, false
   379  
   380  FAIL:
   381  	for {
   382  		select {
   383  		case <-w.ctx.Done():
   384  			return
   385  		case <-deadlineTimer.C:
   386  			// We have hit the progress deadline so fail the deployment. We need
   387  			// to determine whether we should roll back the job by inspecting
   388  			// which allocs as part of the deployment are healthy and which
   389  			// aren't.
   390  			deadlineHit = true
   391  			fail, rback, err := w.shouldFail()
   392  			if err != nil {
   393  				w.logger.Error("failed to determine whether to rollback job", "error", err)
   394  			}
   395  			if !fail {
   396  				w.logger.Debug("skipping deadline")
   397  				continue
   398  			}
   399  
   400  			w.logger.Debug("deadline hit", "rollback", rback)
   401  			rollback = rback
   402  			break FAIL
   403  		case <-w.deploymentUpdateCh:
   404  			// Get the updated deployment and check if we should change the
   405  			// deadline timer
   406  			next := w.getDeploymentProgressCutoff(w.getDeployment())
   407  			if !next.Equal(currentDeadline) {
   408  				prevDeadlineZero := currentDeadline.IsZero()
   409  				currentDeadline = next
   410  				// The most recent deadline can be zero if no allocs were created for this deployment.
   411  				// The deadline timer would have already been stopped once in that case. To prevent
   412  				// deadlocking on the already stopped deadline timer, we only drain the channel if
   413  				// the previous deadline was not zero.
   414  				if !prevDeadlineZero && !deadlineTimer.Stop() {
   415  					select {
   416  					case <-deadlineTimer.C:
   417  					default:
   418  					}
   419  				}
   420  
   421  				// If the next deadline is zero, we should not reset the timer
   422  				// as we aren't tracking towards a progress deadline yet. This
   423  				// can happen if you have multiple task groups with progress
   424  				// deadlines and one of the task groups hasn't made any
   425  				// placements. As soon as the other task group finishes its
   426  				// rollout, the next progress deadline becomes zero, so we want
   427  				// to avoid resetting, causing a deployment failure.
   428  				if !next.IsZero() {
   429  					deadlineTimer.Reset(next.Sub(time.Now()))
   430  				}
   431  			}
   432  
   433  		case updates = <-w.getAllocsCh(allocIndex):
   434  			if err := updates.err; err != nil {
   435  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   436  					return
   437  				}
   438  
   439  				w.logger.Error("failed to retrieve allocations", "error", err)
   440  				return
   441  			}
   442  			allocIndex = updates.index
   443  
   444  			// We have allocation changes for this deployment so determine the
   445  			// steps to take.
   446  			res, err := w.handleAllocUpdate(updates.allocs)
   447  			if err != nil {
   448  				if err == context.Canceled || w.ctx.Err() == context.Canceled {
   449  					return
   450  				}
   451  
   452  				w.logger.Error("failed handling allocation updates", "error", err)
   453  				return
   454  			}
   455  
   456  			// The deployment has failed, so break out of the watch loop and
   457  			// handle the failure
   458  			if res.failDeployment {
   459  				rollback = res.rollback
   460  				break FAIL
   461  			}
   462  
   463  			// Create an eval to push the deployment along
   464  			if res.createEval || len(res.allowReplacements) != 0 {
   465  				w.createBatchedUpdate(res.allowReplacements, allocIndex)
   466  			}
   467  		}
   468  	}
   469  
   470  	// Change the deployments status to failed
   471  	desc := structs.DeploymentStatusDescriptionFailedAllocations
   472  	if deadlineHit {
   473  		desc = structs.DeploymentStatusDescriptionProgressDeadline
   474  	}
   475  
   476  	// Rollback to the old job if necessary
   477  	var j *structs.Job
   478  	if rollback {
   479  		var err error
   480  		j, err = w.latestStableJob()
   481  		if err != nil {
   482  			w.logger.Error("failed to lookup latest stable job", "error", err)
   483  		}
   484  
   485  		// Description should include that the job is being rolled back to
   486  		// version N
   487  		if j != nil {
   488  			j, desc = w.handleRollbackValidity(j, desc)
   489  		} else {
   490  			desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc)
   491  		}
   492  	}
   493  
   494  	// Update the status of the deployment to failed and create an evaluation.
   495  	e := w.getEval()
   496  	u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc)
   497  	if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil {
   498  		w.logger.Error("failed to update deployment status", "error", err)
   499  	}
   500  }
   501  
   502  // allocUpdateResult is used to return the desired actions given the newest set
   503  // of allocations for the deployment.
   504  type allocUpdateResult struct {
   505  	createEval        bool
   506  	failDeployment    bool
   507  	rollback          bool
   508  	allowReplacements []string
   509  }
   510  
   511  // handleAllocUpdate is used to compute the set of actions to take based on the
   512  // updated allocations for the deployment.
   513  func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) {
   514  	var res allocUpdateResult
   515  
   516  	// Get the latest evaluation index
   517  	latestEval, err := w.jobEvalStatus()
   518  	if err != nil {
   519  		if err == context.Canceled || w.ctx.Err() == context.Canceled {
   520  			return res, err
   521  		}
   522  
   523  		return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err)
   524  	}
   525  
   526  	deployment := w.getDeployment()
   527  	for _, alloc := range allocs {
   528  		dstate, ok := deployment.TaskGroups[alloc.TaskGroup]
   529  		if !ok {
   530  			continue
   531  		}
   532  
   533  		// Determine if the update stanza for this group is progress based
   534  		progressBased := dstate.ProgressDeadline != 0
   535  
   536  		// Check if the allocation has failed and we need to mark it for allow
   537  		// replacements
   538  		if progressBased && alloc.DeploymentStatus.IsUnhealthy() &&
   539  			deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() {
   540  			res.allowReplacements = append(res.allowReplacements, alloc.ID)
   541  			continue
   542  		}
   543  
   544  		// We need to create an eval so the job can progress.
   545  		if alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval {
   546  			res.createEval = true
   547  		}
   548  
   549  		// If the group is using a progress deadline, we don't have to do anything.
   550  		if progressBased {
   551  			continue
   552  		}
   553  
   554  		// Fail on the first bad allocation
   555  		if alloc.DeploymentStatus.IsUnhealthy() {
   556  			// Check if the group has autorevert set
   557  			if dstate.AutoRevert {
   558  				res.rollback = true
   559  			}
   560  
   561  			// Since we have an unhealthy allocation, fail the deployment
   562  			res.failDeployment = true
   563  		}
   564  
   565  		// All conditions have been hit so we can break
   566  		if res.createEval && res.failDeployment && res.rollback {
   567  			break
   568  		}
   569  	}
   570  
   571  	return res, nil
   572  }
   573  
   574  // shouldFail returns whether the job should be failed and whether it should
   575  // rolled back to an earlier stable version by examining the allocations in the
   576  // deployment.
   577  func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) {
   578  	snap, err := w.state.Snapshot()
   579  	if err != nil {
   580  		return false, false, err
   581  	}
   582  
   583  	d, err := snap.DeploymentByID(nil, w.deploymentID)
   584  	if err != nil {
   585  		return false, false, err
   586  	}
   587  	if d == nil {
   588  		// The deployment wasn't in the state store, possibly due to a system gc
   589  		return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID)
   590  	}
   591  
   592  	fail = false
   593  	for tg, state := range d.TaskGroups {
   594  		// If we are in a canary state we fail if there aren't enough healthy
   595  		// allocs to satisfy DesiredCanaries
   596  		if state.DesiredCanaries > 0 && !state.Promoted {
   597  			if state.HealthyAllocs >= state.DesiredCanaries {
   598  				continue
   599  			}
   600  		} else if state.HealthyAllocs >= state.DesiredTotal {
   601  			continue
   602  		}
   603  
   604  		// We have failed this TG
   605  		fail = true
   606  
   607  		// We don't need to autorevert this group
   608  		upd := w.j.LookupTaskGroup(tg).Update
   609  		if upd == nil || !upd.AutoRevert {
   610  			continue
   611  		}
   612  
   613  		// Unhealthy allocs and we need to autorevert
   614  		return true, true, nil
   615  	}
   616  
   617  	return fail, false, nil
   618  }
   619  
   620  // getDeploymentProgressCutoff returns the progress cutoff for the given
   621  // deployment
   622  func (w *deploymentWatcher) getDeploymentProgressCutoff(d *structs.Deployment) time.Time {
   623  	var next time.Time
   624  	doneTGs := w.doneGroups(d)
   625  	for name, state := range d.TaskGroups {
   626  		// This task group is done so we don't have to concern ourselves with
   627  		// its progress deadline.
   628  		if done, ok := doneTGs[name]; ok && done {
   629  			continue
   630  		}
   631  
   632  		if state.RequireProgressBy.IsZero() {
   633  			continue
   634  		}
   635  
   636  		if next.IsZero() || state.RequireProgressBy.Before(next) {
   637  			next = state.RequireProgressBy
   638  		}
   639  	}
   640  	return next
   641  }
   642  
   643  // doneGroups returns a map of task group to whether the deployment appears to
   644  // be done for the group. A true value doesn't mean no more action will be taken
   645  // in the life time of the deployment because there could always be node
   646  // failures, or rescheduling events.
   647  func (w *deploymentWatcher) doneGroups(d *structs.Deployment) map[string]bool {
   648  	if d == nil {
   649  		return nil
   650  	}
   651  
   652  	// Collect the allocations by the task group
   653  	snap, err := w.state.Snapshot()
   654  	if err != nil {
   655  		return nil
   656  	}
   657  
   658  	allocs, err := snap.AllocsByDeployment(nil, d.ID)
   659  	if err != nil {
   660  		return nil
   661  	}
   662  
   663  	// Go through the allocs and count up how many healthy allocs we have
   664  	healthy := make(map[string]int, len(d.TaskGroups))
   665  	for _, a := range allocs {
   666  		if a.TerminalStatus() || !a.DeploymentStatus.IsHealthy() {
   667  			continue
   668  		}
   669  		healthy[a.TaskGroup]++
   670  	}
   671  
   672  	// Go through each group and check if it done
   673  	groups := make(map[string]bool, len(d.TaskGroups))
   674  	for name, state := range d.TaskGroups {
   675  		// Requires promotion
   676  		if state.DesiredCanaries != 0 && !state.Promoted {
   677  			groups[name] = false
   678  			continue
   679  		}
   680  
   681  		// Check we have enough healthy currently running allocations
   682  		groups[name] = healthy[name] >= state.DesiredTotal
   683  	}
   684  
   685  	return groups
   686  }
   687  
   688  // latestStableJob returns the latest stable job. It may be nil if none exist
   689  func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) {
   690  	snap, err := w.state.Snapshot()
   691  	if err != nil {
   692  		return nil, err
   693  	}
   694  
   695  	versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID)
   696  	if err != nil {
   697  		return nil, err
   698  	}
   699  
   700  	var stable *structs.Job
   701  	for _, job := range versions {
   702  		if job.Stable {
   703  			stable = job
   704  			break
   705  		}
   706  	}
   707  
   708  	return stable, nil
   709  }
   710  
   711  // createBatchedUpdate creates an eval for the given index as well as updating
   712  // the given allocations to allow them to reschedule.
   713  func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) {
   714  	w.l.Lock()
   715  	defer w.l.Unlock()
   716  
   717  	// Store the allocations that can be replaced
   718  	for _, allocID := range allowReplacements {
   719  		if w.outstandingAllowReplacements == nil {
   720  			w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements))
   721  		}
   722  		w.outstandingAllowReplacements[allocID] = allowRescheduleTransition
   723  	}
   724  
   725  	if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) {
   726  		return
   727  	}
   728  
   729  	w.outstandingBatch = true
   730  
   731  	time.AfterFunc(perJobEvalBatchPeriod, func() {
   732  		// If the timer has been created and then we shutdown, we need to no-op
   733  		// the evaluation creation.
   734  		select {
   735  		case <-w.ctx.Done():
   736  			return
   737  		default:
   738  		}
   739  
   740  		w.l.Lock()
   741  		replacements := w.outstandingAllowReplacements
   742  		w.outstandingAllowReplacements = nil
   743  		w.outstandingBatch = false
   744  		w.l.Unlock()
   745  
   746  		// Create the eval
   747  		if _, err := w.createUpdate(replacements, w.getEval()); err != nil {
   748  			w.logger.Error("failed to create evaluation for deployment", "deployment_id", w.deploymentID, "error", err)
   749  		}
   750  	})
   751  }
   752  
   753  // getEval returns an evaluation suitable for the deployment
   754  func (w *deploymentWatcher) getEval() *structs.Evaluation {
   755  	return &structs.Evaluation{
   756  		ID:           uuid.Generate(),
   757  		Namespace:    w.j.Namespace,
   758  		Priority:     w.j.Priority,
   759  		Type:         w.j.Type,
   760  		TriggeredBy:  structs.EvalTriggerDeploymentWatcher,
   761  		JobID:        w.j.ID,
   762  		DeploymentID: w.deploymentID,
   763  		Status:       structs.EvalStatusPending,
   764  	}
   765  }
   766  
   767  // getDeploymentStatusUpdate returns a deployment status update
   768  func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate {
   769  	return &structs.DeploymentStatusUpdate{
   770  		DeploymentID:      w.deploymentID,
   771  		Status:            status,
   772  		StatusDescription: desc,
   773  	}
   774  }
   775  
   776  type allocUpdates struct {
   777  	allocs []*structs.AllocListStub
   778  	index  uint64
   779  	err    error
   780  }
   781  
   782  // getAllocsCh retrieves the allocations that are part of the deployment blocking
   783  // at the given index.
   784  func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates {
   785  	out := make(chan *allocUpdates, 1)
   786  	go func() {
   787  		allocs, index, err := w.getAllocs(index)
   788  		out <- &allocUpdates{
   789  			allocs: allocs,
   790  			index:  index,
   791  			err:    err,
   792  		}
   793  	}()
   794  
   795  	return out
   796  }
   797  
   798  // getAllocs retrieves the allocations that are part of the deployment blocking
   799  // at the given index.
   800  func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) {
   801  	resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx)
   802  	if err != nil {
   803  		return nil, 0, err
   804  	}
   805  	if err := w.ctx.Err(); err != nil {
   806  		return nil, 0, err
   807  	}
   808  
   809  	return resp.([]*structs.AllocListStub), index, nil
   810  }
   811  
   812  // getDeploysImpl retrieves all deployments from the passed state store.
   813  func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) {
   814  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   815  		return nil, 0, err
   816  	}
   817  
   818  	// Capture all the allocations
   819  	allocs, err := state.AllocsByDeployment(ws, w.deploymentID)
   820  	if err != nil {
   821  		return nil, 0, err
   822  	}
   823  
   824  	maxIndex := uint64(0)
   825  	stubs := make([]*structs.AllocListStub, 0, len(allocs))
   826  	for _, alloc := range allocs {
   827  		stubs = append(stubs, alloc.Stub())
   828  
   829  		if maxIndex < alloc.ModifyIndex {
   830  			maxIndex = alloc.ModifyIndex
   831  		}
   832  	}
   833  
   834  	// Use the last index that affected the allocs table
   835  	if len(stubs) == 0 {
   836  		index, err := state.Index("allocs")
   837  		if err != nil {
   838  			return nil, index, err
   839  		}
   840  		maxIndex = index
   841  	}
   842  
   843  	return stubs, maxIndex, nil
   844  }
   845  
   846  // jobEvalStatus returns the latest eval index for a job. The index is used to
   847  // determine if an allocation update requires an evaluation to be triggered.
   848  func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, err error) {
   849  	if err := w.queryLimiter.Wait(w.ctx); err != nil {
   850  		return 0, err
   851  	}
   852  
   853  	snap, err := w.state.Snapshot()
   854  	if err != nil {
   855  		return 0, err
   856  	}
   857  
   858  	evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID)
   859  	if err != nil {
   860  		return 0, err
   861  	}
   862  
   863  	// If there are no evals for the job, return zero, since we want any
   864  	// allocation change to trigger an evaluation.
   865  	if len(evals) == 0 {
   866  		return 0, nil
   867  	}
   868  
   869  	var max uint64
   870  	for _, eval := range evals {
   871  		// A cancelled eval never impacts what the scheduler has saw, so do not
   872  		// use it's indexes.
   873  		if eval.Status == structs.EvalStatusCancelled {
   874  			continue
   875  		}
   876  
   877  		// Prefer using the snapshot index. Otherwise use the create index
   878  		if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex {
   879  			max = eval.SnapshotIndex
   880  		} else if max < eval.CreateIndex {
   881  			max = eval.CreateIndex
   882  		}
   883  	}
   884  
   885  	return max, nil
   886  }