github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/scheduler/reconcile.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/helper"
     9  	"github.com/hashicorp/nomad/nomad/structs"
    10  )
    11  
    12  // allocUpdateType takes an existing allocation and a new job definition and
    13  // returns whether the allocation can ignore the change, requires a destructive
    14  // update, or can be inplace updated. If it can be inplace updated, an updated
    15  // allocation that has the new resources and alloc metrics attached will be
    16  // returned.
    17  type allocUpdateType func(existing *structs.Allocation, newJob *structs.Job,
    18  	newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation)
    19  
    20  // allocReconciler is used to determine the set of allocations that require
    21  // placement, inplace updating or stopping given the job specification and
    22  // existing cluster state. The reconciler should only be used for batch and
    23  // service jobs.
    24  type allocReconciler struct {
    25  	// logger is used to log debug information. Logging should be kept at a
    26  	// minimal here
    27  	logger *log.Logger
    28  
    29  	// canInplace is used to check if the allocation can be inplace upgraded
    30  	allocUpdateFn allocUpdateType
    31  
    32  	// batch marks whether the job is a batch job
    33  	batch bool
    34  
    35  	// job is the job being operated on, it may be nil if the job is being
    36  	// stopped via a purge
    37  	job *structs.Job
    38  
    39  	// jobID is the ID of the job being operated on. The job may be nil if it is
    40  	// being stopped so we require this separately.
    41  	jobID string
    42  
    43  	// oldDeployment is the last deployment for the job
    44  	oldDeployment *structs.Deployment
    45  
    46  	// deployment is the current deployment for the job
    47  	deployment *structs.Deployment
    48  
    49  	// deploymentPaused marks whether the deployment is paused
    50  	deploymentPaused bool
    51  
    52  	// deploymentFailed marks whether the deployment is failed
    53  	deploymentFailed bool
    54  
    55  	// taintedNodes contains a map of nodes that are tainted
    56  	taintedNodes map[string]*structs.Node
    57  
    58  	// existingAllocs is non-terminal existing allocations
    59  	existingAllocs []*structs.Allocation
    60  
    61  	// result is the results of the reconcile. During computation it can be
    62  	// used to store intermediate state
    63  	result *reconcileResults
    64  }
    65  
    66  // reconcileResults contains the results of the reconciliation and should be
    67  // applied by the scheduler.
    68  type reconcileResults struct {
    69  	// deployment is the deployment that should be created or updated as a
    70  	// result of scheduling
    71  	deployment *structs.Deployment
    72  
    73  	// deploymentUpdates contains a set of deployment updates that should be
    74  	// applied as a result of scheduling
    75  	deploymentUpdates []*structs.DeploymentStatusUpdate
    76  
    77  	// place is the set of allocations to place by the scheduler
    78  	place []allocPlaceResult
    79  
    80  	// destructiveUpdate is the set of allocations to apply a destructive update to
    81  	destructiveUpdate []allocDestructiveResult
    82  
    83  	// inplaceUpdate is the set of allocations to apply an inplace update to
    84  	inplaceUpdate []*structs.Allocation
    85  
    86  	// stop is the set of allocations to stop
    87  	stop []allocStopResult
    88  
    89  	// desiredTGUpdates captures the desired set of changes to make for each
    90  	// task group.
    91  	desiredTGUpdates map[string]*structs.DesiredUpdates
    92  
    93  	// followupEvalWait is set if there should be a followup eval run after the
    94  	// given duration
    95  	followupEvalWait time.Duration
    96  }
    97  
    98  func (r *reconcileResults) GoString() string {
    99  	base := fmt.Sprintf("Total changes: (place %d) (destructive %d) (inplace %d) (stop %d)",
   100  		len(r.place), len(r.destructiveUpdate), len(r.inplaceUpdate), len(r.stop))
   101  
   102  	if r.deployment != nil {
   103  		base += fmt.Sprintf("\nCreated Deployment: %q", r.deployment.ID)
   104  	}
   105  	for _, u := range r.deploymentUpdates {
   106  		base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q",
   107  			u.DeploymentID, u.Status, u.StatusDescription)
   108  	}
   109  	if r.followupEvalWait != 0 {
   110  		base += fmt.Sprintf("\nFollowup Eval in %v", r.followupEvalWait)
   111  	}
   112  	for tg, u := range r.desiredTGUpdates {
   113  		base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u)
   114  	}
   115  	return base
   116  }
   117  
   118  // Changes returns the number of total changes
   119  func (r *reconcileResults) Changes() int {
   120  	return len(r.place) + len(r.inplaceUpdate) + len(r.stop)
   121  }
   122  
   123  // NewAllocReconciler creates a new reconciler that should be used to determine
   124  // the changes required to bring the cluster state inline with the declared jobspec
   125  func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch bool,
   126  	jobID string, job *structs.Job, deployment *structs.Deployment,
   127  	existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node) *allocReconciler {
   128  
   129  	return &allocReconciler{
   130  		logger:         logger,
   131  		allocUpdateFn:  allocUpdateFn,
   132  		batch:          batch,
   133  		jobID:          jobID,
   134  		job:            job,
   135  		deployment:     deployment.Copy(),
   136  		existingAllocs: existingAllocs,
   137  		taintedNodes:   taintedNodes,
   138  		result: &reconcileResults{
   139  			desiredTGUpdates: make(map[string]*structs.DesiredUpdates),
   140  		},
   141  	}
   142  }
   143  
   144  // Compute reconciles the existing cluster state and returns the set of changes
   145  // required to converge the job spec and state
   146  func (a *allocReconciler) Compute() *reconcileResults {
   147  	// Create the allocation matrix
   148  	m := newAllocMatrix(a.job, a.existingAllocs)
   149  
   150  	// Handle stopping unneeded deployments
   151  	a.cancelDeployments()
   152  
   153  	// If we are just stopping a job we do not need to do anything more than
   154  	// stopping all running allocs
   155  	if a.job.Stopped() {
   156  		a.handleStop(m)
   157  		return a.result
   158  	}
   159  
   160  	// Detect if the deployment is paused
   161  	if a.deployment != nil {
   162  		// Detect if any allocs associated with this deploy have failed
   163  		// Failed allocations could edge trigger an evaluation before the deployment watcher
   164  		// runs and marks the deploy as failed. This block makes sure that is still
   165  		// considered a failed deploy
   166  		failedAllocsInDeploy := false
   167  		for _, as := range m {
   168  			for _, alloc := range as {
   169  				if alloc.DeploymentID == a.deployment.ID && alloc.ClientStatus == structs.AllocClientStatusFailed {
   170  					failedAllocsInDeploy = true
   171  				}
   172  			}
   173  		}
   174  		a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused
   175  		a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed || failedAllocsInDeploy
   176  	}
   177  
   178  	// Reconcile each group
   179  	complete := true
   180  	for group, as := range m {
   181  		groupComplete := a.computeGroup(group, as)
   182  		complete = complete && groupComplete
   183  	}
   184  
   185  	// Mark the deployment as complete if possible
   186  	if a.deployment != nil && complete {
   187  		a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
   188  			DeploymentID:      a.deployment.ID,
   189  			Status:            structs.DeploymentStatusSuccessful,
   190  			StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
   191  		})
   192  	}
   193  
   194  	// Set the description of a created deployment
   195  	if d := a.result.deployment; d != nil {
   196  		if d.RequiresPromotion() {
   197  			d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
   198  		}
   199  	}
   200  
   201  	return a.result
   202  }
   203  
   204  // cancelDeployments cancels any deployment that is not needed
   205  func (a *allocReconciler) cancelDeployments() {
   206  	// If the job is stopped and there is a non-terminal deployment, cancel it
   207  	if a.job.Stopped() {
   208  		if a.deployment != nil && a.deployment.Active() {
   209  			a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
   210  				DeploymentID:      a.deployment.ID,
   211  				Status:            structs.DeploymentStatusCancelled,
   212  				StatusDescription: structs.DeploymentStatusDescriptionStoppedJob,
   213  			})
   214  		}
   215  
   216  		// Nothing else to do
   217  		a.oldDeployment = a.deployment
   218  		a.deployment = nil
   219  		return
   220  	}
   221  
   222  	d := a.deployment
   223  	if d == nil {
   224  		return
   225  	}
   226  
   227  	// Check if the deployment is active and referencing an older job and cancel it
   228  	if d.JobCreateIndex != a.job.CreateIndex || d.JobVersion != a.job.Version {
   229  		if d.Active() {
   230  			a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{
   231  				DeploymentID:      a.deployment.ID,
   232  				Status:            structs.DeploymentStatusCancelled,
   233  				StatusDescription: structs.DeploymentStatusDescriptionNewerJob,
   234  			})
   235  		}
   236  
   237  		a.oldDeployment = d
   238  		a.deployment = nil
   239  	}
   240  
   241  	// Clear it as the current deployment if it is successful
   242  	if d.Status == structs.DeploymentStatusSuccessful {
   243  		a.oldDeployment = d
   244  		a.deployment = nil
   245  	}
   246  }
   247  
   248  // handleStop marks all allocations to be stopped, handling the lost case
   249  func (a *allocReconciler) handleStop(m allocMatrix) {
   250  	for group, as := range m {
   251  		untainted, migrate, lost := as.filterByTainted(a.taintedNodes)
   252  		a.markStop(untainted, "", allocNotNeeded)
   253  		a.markStop(migrate, "", allocNotNeeded)
   254  		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
   255  		desiredChanges := new(structs.DesiredUpdates)
   256  		desiredChanges.Stop = uint64(len(as))
   257  		a.result.desiredTGUpdates[group] = desiredChanges
   258  	}
   259  }
   260  
   261  // markStop is a helper for marking a set of allocation for stop with a
   262  // particular client status and description.
   263  func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) {
   264  	for _, alloc := range allocs {
   265  		a.result.stop = append(a.result.stop, allocStopResult{
   266  			alloc:             alloc,
   267  			clientStatus:      clientStatus,
   268  			statusDescription: statusDescription,
   269  		})
   270  	}
   271  }
   272  
   273  // computeGroup reconciles state for a particular task group. It returns whether
   274  // the deployment it is for is complete with regards to the task group.
   275  func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
   276  	// Create the desired update object for the group
   277  	desiredChanges := new(structs.DesiredUpdates)
   278  	a.result.desiredTGUpdates[group] = desiredChanges
   279  
   280  	// Get the task group. The task group may be nil if the job was updates such
   281  	// that the task group no longer exists
   282  	tg := a.job.LookupTaskGroup(group)
   283  
   284  	// If the task group is nil, then the task group has been removed so all we
   285  	// need to do is stop everything
   286  	if tg == nil {
   287  		untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
   288  		a.markStop(untainted, "", allocNotNeeded)
   289  		a.markStop(migrate, "", allocNotNeeded)
   290  		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
   291  		desiredChanges.Stop = uint64(len(untainted) + len(migrate) + len(lost))
   292  		return true
   293  	}
   294  
   295  	// Get the deployment state for the group
   296  	var dstate *structs.DeploymentState
   297  	existingDeployment := false
   298  	if a.deployment != nil {
   299  		dstate, existingDeployment = a.deployment.TaskGroups[group]
   300  	}
   301  	if !existingDeployment {
   302  		autorevert := false
   303  		if tg.Update != nil && tg.Update.AutoRevert {
   304  			autorevert = true
   305  		}
   306  		dstate = &structs.DeploymentState{
   307  			AutoRevert: autorevert,
   308  		}
   309  	}
   310  
   311  	// Filter batch allocations that do not need to be considered.
   312  	all, ignore := a.batchFiltration(all)
   313  	desiredChanges.Ignore += uint64(len(ignore))
   314  
   315  	canaries, all := a.handleGroupCanaries(all, desiredChanges)
   316  
   317  	// Determine what set of allocations are on tainted nodes
   318  	untainted, migrate, lost := all.filterByTainted(a.taintedNodes)
   319  
   320  	// Determine what set of terminal allocations need to be rescheduled
   321  	untainted, reschedule := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy)
   322  
   323  	// Create a structure for choosing names. Seed with the taken names which is
   324  	// the union of untainted and migrating nodes (includes canaries)
   325  	nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, reschedule))
   326  
   327  	// Stop any unneeded allocations and update the untainted set to not
   328  	// included stopped allocations.
   329  	canaryState := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
   330  	stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState)
   331  	desiredChanges.Stop += uint64(len(stop))
   332  	untainted = untainted.difference(stop)
   333  
   334  	// Having stopped un-needed allocations, append the canaries to the existing
   335  	// set of untainted because they are promoted. This will cause them to be
   336  	// treated like non-canaries
   337  	if !canaryState {
   338  		untainted = untainted.union(canaries)
   339  		nameIndex.Set(canaries)
   340  	}
   341  
   342  	// Do inplace upgrades where possible and capture the set of upgrades that
   343  	// need to be done destructively.
   344  	ignore, inplace, destructive := a.computeUpdates(tg, untainted)
   345  	desiredChanges.Ignore += uint64(len(ignore))
   346  	desiredChanges.InPlaceUpdate += uint64(len(inplace))
   347  	if !existingDeployment {
   348  		dstate.DesiredTotal += len(destructive) + len(inplace)
   349  	}
   350  
   351  	// The fact that we have destructive updates and have less canaries than is
   352  	// desired means we need to create canaries
   353  	numDestructive := len(destructive)
   354  	strategy := tg.Update
   355  	canariesPromoted := dstate != nil && dstate.Promoted
   356  	requireCanary := numDestructive != 0 && strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted
   357  	if requireCanary && !a.deploymentPaused && !a.deploymentFailed {
   358  		number := strategy.Canary - len(canaries)
   359  		number = helper.IntMin(numDestructive, number)
   360  		desiredChanges.Canary += uint64(number)
   361  		if !existingDeployment {
   362  			dstate.DesiredCanaries = strategy.Canary
   363  		}
   364  
   365  		for _, name := range nameIndex.NextCanaries(uint(number), canaries, destructive) {
   366  			a.result.place = append(a.result.place, allocPlaceResult{
   367  				name:      name,
   368  				canary:    true,
   369  				taskGroup: tg,
   370  			})
   371  		}
   372  	}
   373  
   374  	// Determine how many we can place
   375  	canaryState = dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted
   376  	limit := a.computeLimit(tg, untainted, destructive, migrate, canaryState)
   377  
   378  	// Place if:
   379  	// * The deployment is not paused or failed
   380  	// * Not placing any canaries
   381  	// * If there are any canaries that they have been promoted
   382  	place := a.computePlacements(tg, nameIndex, untainted, migrate, reschedule)
   383  	if !existingDeployment {
   384  		dstate.DesiredTotal += len(place)
   385  	}
   386  
   387  	// deploymentPlaceReady tracks whether the deployment is in a state where
   388  	// placements can be made without any other consideration.
   389  	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState
   390  
   391  	if deploymentPlaceReady {
   392  		desiredChanges.Place += uint64(len(place))
   393  		for _, p := range place {
   394  			a.result.place = append(a.result.place, p)
   395  		}
   396  
   397  		min := helper.IntMin(len(place), limit)
   398  		limit -= min
   399  	} else if !deploymentPlaceReady && len(lost) != 0 {
   400  		// We are in a situation where we shouldn't be placing more than we need
   401  		// to but we have lost allocations. It is a very weird user experience
   402  		// if you have a node go down and Nomad doesn't replace the allocations
   403  		// because the deployment is paused/failed so we only place to recover
   404  		// the lost allocations.
   405  		allowed := helper.IntMin(len(lost), len(place))
   406  		desiredChanges.Place += uint64(allowed)
   407  		for _, p := range place[:allowed] {
   408  			a.result.place = append(a.result.place, p)
   409  		}
   410  	}
   411  
   412  	if deploymentPlaceReady {
   413  		// Do all destructive updates
   414  		min := helper.IntMin(len(destructive), limit)
   415  		limit -= min
   416  		desiredChanges.DestructiveUpdate += uint64(min)
   417  		desiredChanges.Ignore += uint64(len(destructive) - min)
   418  		for _, alloc := range destructive.nameOrder()[:min] {
   419  			a.result.destructiveUpdate = append(a.result.destructiveUpdate, allocDestructiveResult{
   420  				placeName:             alloc.Name,
   421  				placeTaskGroup:        tg,
   422  				stopAlloc:             alloc,
   423  				stopStatusDescription: allocUpdating,
   424  			})
   425  		}
   426  	} else {
   427  		desiredChanges.Ignore += uint64(len(destructive))
   428  	}
   429  
   430  	// Calculate the allowed number of changes and set the desired changes
   431  	// accordingly.
   432  	min := helper.IntMin(len(migrate), limit)
   433  	if !a.deploymentFailed && !a.deploymentPaused {
   434  		desiredChanges.Migrate += uint64(min)
   435  		desiredChanges.Ignore += uint64(len(migrate) - min)
   436  	} else {
   437  		desiredChanges.Stop += uint64(len(migrate))
   438  	}
   439  
   440  	followup := false
   441  	migrated := 0
   442  	for _, alloc := range migrate.nameOrder() {
   443  		// If the deployment is failed or paused, don't replace it, just mark as stop.
   444  		if a.deploymentFailed || a.deploymentPaused {
   445  			a.result.stop = append(a.result.stop, allocStopResult{
   446  				alloc:             alloc,
   447  				statusDescription: allocNodeTainted,
   448  			})
   449  			continue
   450  		}
   451  
   452  		if migrated >= limit {
   453  			followup = true
   454  			break
   455  		}
   456  
   457  		migrated++
   458  		a.result.stop = append(a.result.stop, allocStopResult{
   459  			alloc:             alloc,
   460  			statusDescription: allocMigrating,
   461  		})
   462  		a.result.place = append(a.result.place, allocPlaceResult{
   463  			name:          alloc.Name,
   464  			canary:        false,
   465  			taskGroup:     tg,
   466  			previousAlloc: alloc,
   467  		})
   468  	}
   469  
   470  	// We need to create a followup evaluation.
   471  	if followup && strategy != nil && a.result.followupEvalWait < strategy.Stagger {
   472  		a.result.followupEvalWait = strategy.Stagger
   473  	}
   474  
   475  	// Create a new deployment if necessary
   476  	if !existingDeployment && strategy != nil && dstate.DesiredTotal != 0 {
   477  		// A previous group may have made the deployment already
   478  		if a.deployment == nil {
   479  			a.deployment = structs.NewDeployment(a.job)
   480  			a.result.deployment = a.deployment
   481  		}
   482  
   483  		// Attach the groups deployment state to the deployment
   484  		a.deployment.TaskGroups[group] = dstate
   485  	}
   486  
   487  	// deploymentComplete is whether the deployment is complete which largely
   488  	// means that no placements were made or desired to be made
   489  	deploymentComplete := len(destructive)+len(inplace)+len(place)+len(migrate) == 0 && !requireCanary
   490  
   491  	// Final check to see if the deployment is complete is to ensure everything
   492  	// is healthy
   493  	if deploymentComplete && a.deployment != nil {
   494  		partOf, _ := untainted.filterByDeployment(a.deployment.ID)
   495  		for _, alloc := range partOf {
   496  			if !alloc.DeploymentStatus.IsHealthy() {
   497  				deploymentComplete = false
   498  				break
   499  			}
   500  		}
   501  	}
   502  
   503  	return deploymentComplete
   504  }
   505  
   506  // batchFiltration filters batch allocations that should be ignored. These are
   507  // allocations that are terminal from a previous job version.
   508  func (a *allocReconciler) batchFiltration(all allocSet) (filtered, ignore allocSet) {
   509  	if !a.batch {
   510  		return all, nil
   511  	}
   512  
   513  	filtered = filtered.union(all)
   514  	ignored := make(map[string]*structs.Allocation)
   515  
   516  	// Ignore terminal batch jobs from older versions
   517  	for id, alloc := range filtered {
   518  		older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex
   519  		if older && alloc.TerminalStatus() {
   520  			delete(filtered, id)
   521  			ignored[id] = alloc
   522  		}
   523  	}
   524  
   525  	return filtered, ignored
   526  }
   527  
   528  // handleGroupCanaries handles the canaries for the group by stopping the
   529  // unneeded ones and returning the current set of canaries and the updated total
   530  // set of allocs for the group
   531  func (a *allocReconciler) handleGroupCanaries(all allocSet, desiredChanges *structs.DesiredUpdates) (canaries, newAll allocSet) {
   532  	// Stop any canary from an older deployment or from a failed one
   533  	var stop []string
   534  
   535  	// Cancel any non-promoted canaries from the older deployment
   536  	if a.oldDeployment != nil {
   537  		for _, s := range a.oldDeployment.TaskGroups {
   538  			if !s.Promoted {
   539  				stop = append(stop, s.PlacedCanaries...)
   540  			}
   541  		}
   542  	}
   543  
   544  	// Cancel any non-promoted canaries from a failed deployment
   545  	if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusFailed {
   546  		for _, s := range a.deployment.TaskGroups {
   547  			if !s.Promoted {
   548  				stop = append(stop, s.PlacedCanaries...)
   549  			}
   550  		}
   551  	}
   552  
   553  	// stopSet is the allocSet that contains the canaries we desire to stop from
   554  	// above.
   555  	stopSet := all.fromKeys(stop)
   556  	a.markStop(stopSet, "", allocNotNeeded)
   557  	desiredChanges.Stop += uint64(len(stopSet))
   558  	all = all.difference(stopSet)
   559  
   560  	// Capture our current set of canaries and handle any migrations that are
   561  	// needed by just stopping them.
   562  	if a.deployment != nil {
   563  		var canaryIDs []string
   564  		for _, s := range a.deployment.TaskGroups {
   565  			canaryIDs = append(canaryIDs, s.PlacedCanaries...)
   566  		}
   567  
   568  		canaries = all.fromKeys(canaryIDs)
   569  		untainted, migrate, lost := canaries.filterByTainted(a.taintedNodes)
   570  		a.markStop(migrate, "", allocMigrating)
   571  		a.markStop(lost, structs.AllocClientStatusLost, allocLost)
   572  
   573  		canaries = untainted
   574  		all = all.difference(migrate, lost)
   575  	}
   576  
   577  	return canaries, all
   578  }
   579  
   580  // computeLimit returns the placement limit for a particular group. The inputs
   581  // are the group definition, the untainted, destructive, and migrate allocation
   582  // set and whether we are in a canary state.
   583  func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, destructive, migrate allocSet, canaryState bool) int {
   584  	// If there is no update stategy or deployment for the group we can deploy
   585  	// as many as the group has
   586  	if group.Update == nil || len(destructive)+len(migrate) == 0 {
   587  		return group.Count
   588  	} else if a.deploymentPaused || a.deploymentFailed {
   589  		// If the deployment is paused or failed, do not create anything else
   590  		return 0
   591  	}
   592  
   593  	// If we have canaries and they have not been promoted the limit is 0
   594  	if canaryState {
   595  		return 0
   596  	}
   597  
   598  	// If we have been promoted or there are no canaries, the limit is the
   599  	// configured MaxParallel minus any outstanding non-healthy alloc for the
   600  	// deployment
   601  	limit := group.Update.MaxParallel
   602  	if a.deployment != nil {
   603  		partOf, _ := untainted.filterByDeployment(a.deployment.ID)
   604  		for _, alloc := range partOf {
   605  			// An unhealthy allocation means nothing else should be happen.
   606  			if alloc.DeploymentStatus.IsUnhealthy() {
   607  				return 0
   608  			}
   609  
   610  			if !alloc.DeploymentStatus.IsHealthy() {
   611  				limit--
   612  			}
   613  		}
   614  	}
   615  
   616  	// The limit can be less than zero in the case that the job was changed such
   617  	// that it required destructive changes and the count was scaled up.
   618  	if limit < 0 {
   619  		return 0
   620  	}
   621  
   622  	return limit
   623  }
   624  
   625  // computePlacement returns the set of allocations to place given the group
   626  // definition, the set of untainted, migrating and reschedule allocations for the group.
   627  func (a *allocReconciler) computePlacements(group *structs.TaskGroup,
   628  	nameIndex *allocNameIndex, untainted, migrate allocSet, reschedule allocSet) []allocPlaceResult {
   629  
   630  	// Hot path the nothing to do case
   631  	existing := len(untainted) + len(migrate)
   632  	if existing >= group.Count {
   633  		return nil
   634  	}
   635  	var place []allocPlaceResult
   636  	// Add rescheduled placement results
   637  	// Any allocations being rescheduled will remain at DesiredStatusRun ClientStatusFailed
   638  	for _, alloc := range reschedule {
   639  		place = append(place, allocPlaceResult{
   640  			name:          alloc.Name,
   641  			taskGroup:     group,
   642  			previousAlloc: alloc,
   643  			reschedule:    true,
   644  		})
   645  		existing += 1
   646  		if existing == group.Count {
   647  			break
   648  		}
   649  	}
   650  	// Add remaining placement results
   651  	if existing < group.Count {
   652  		for _, name := range nameIndex.Next(uint(group.Count - existing)) {
   653  			place = append(place, allocPlaceResult{
   654  				name:      name,
   655  				taskGroup: group,
   656  			})
   657  		}
   658  	}
   659  
   660  	return place
   661  }
   662  
   663  // computeStop returns the set of allocations that are marked for stopping given
   664  // the group definition, the set of allocations in various states and whether we
   665  // are canarying.
   666  func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex,
   667  	untainted, migrate, lost, canaries allocSet, canaryState bool) allocSet {
   668  
   669  	// Mark all lost allocations for stop. Previous allocation doesn't matter
   670  	// here since it is on a lost node
   671  	var stop allocSet
   672  	stop = stop.union(lost)
   673  	a.markStop(lost, structs.AllocClientStatusLost, allocLost)
   674  
   675  	// If we are still deploying or creating canaries, don't stop them
   676  	if canaryState {
   677  		untainted = untainted.difference(canaries)
   678  	}
   679  
   680  	// Hot path the nothing to do case
   681  	remove := len(untainted) + len(migrate) - group.Count
   682  	if remove <= 0 {
   683  		return stop
   684  	}
   685  
   686  	// Filter out any terminal allocations from the untainted set
   687  	// This is so that we don't try to mark them as stopped redundantly
   688  	untainted = filterByTerminal(untainted)
   689  
   690  	// Prefer stopping any alloc that has the same name as the canaries if we
   691  	// are promoted
   692  	if !canaryState && len(canaries) != 0 {
   693  		canaryNames := canaries.nameSet()
   694  		for id, alloc := range untainted.difference(canaries) {
   695  			if _, match := canaryNames[alloc.Name]; match {
   696  				stop[id] = alloc
   697  				a.result.stop = append(a.result.stop, allocStopResult{
   698  					alloc:             alloc,
   699  					statusDescription: allocNotNeeded,
   700  				})
   701  				delete(untainted, id)
   702  
   703  				remove--
   704  				if remove == 0 {
   705  					return stop
   706  				}
   707  			}
   708  		}
   709  	}
   710  
   711  	// Prefer selecting from the migrating set before stopping existing allocs
   712  	if len(migrate) != 0 {
   713  		mNames := newAllocNameIndex(a.jobID, group.Name, group.Count, migrate)
   714  		removeNames := mNames.Highest(uint(remove))
   715  		for id, alloc := range migrate {
   716  			if _, match := removeNames[alloc.Name]; !match {
   717  				continue
   718  			}
   719  			a.result.stop = append(a.result.stop, allocStopResult{
   720  				alloc:             alloc,
   721  				statusDescription: allocNotNeeded,
   722  			})
   723  			delete(migrate, id)
   724  			stop[id] = alloc
   725  			nameIndex.UnsetIndex(alloc.Index())
   726  
   727  			remove--
   728  			if remove == 0 {
   729  				return stop
   730  			}
   731  		}
   732  	}
   733  
   734  	// Select the allocs with the highest count to remove
   735  	removeNames := nameIndex.Highest(uint(remove))
   736  	for id, alloc := range untainted {
   737  		if _, ok := removeNames[alloc.Name]; ok {
   738  			stop[id] = alloc
   739  			a.result.stop = append(a.result.stop, allocStopResult{
   740  				alloc:             alloc,
   741  				statusDescription: allocNotNeeded,
   742  			})
   743  			delete(untainted, id)
   744  
   745  			remove--
   746  			if remove == 0 {
   747  				return stop
   748  			}
   749  		}
   750  	}
   751  
   752  	// It is possible that we didn't stop as many as we should have if there
   753  	// were allocations with duplicate names.
   754  	for id, alloc := range untainted {
   755  		stop[id] = alloc
   756  		a.result.stop = append(a.result.stop, allocStopResult{
   757  			alloc:             alloc,
   758  			statusDescription: allocNotNeeded,
   759  		})
   760  		delete(untainted, id)
   761  
   762  		remove--
   763  		if remove == 0 {
   764  			return stop
   765  		}
   766  	}
   767  
   768  	return stop
   769  }
   770  
   771  // computeUpdates determines which allocations for the passed group require
   772  // updates. Three groups are returned:
   773  // 1. Those that require no upgrades
   774  // 2. Those that can be upgraded in-place. These are added to the results
   775  // automatically since the function contains the correct state to do so,
   776  // 3. Those that require destructive updates
   777  func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) {
   778  	// Determine the set of allocations that need to be updated
   779  	ignore = make(map[string]*structs.Allocation)
   780  	inplace = make(map[string]*structs.Allocation)
   781  	destructive = make(map[string]*structs.Allocation)
   782  
   783  	for _, alloc := range untainted {
   784  		ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.job, group)
   785  		if ignoreChange {
   786  			ignore[alloc.ID] = alloc
   787  		} else if destructiveChange {
   788  			destructive[alloc.ID] = alloc
   789  		} else {
   790  			// Attach the deployment ID and and clear the health if the
   791  			// deployment has changed
   792  			inplace[alloc.ID] = alloc
   793  			a.result.inplaceUpdate = append(a.result.inplaceUpdate, inplaceAlloc)
   794  		}
   795  	}
   796  
   797  	return
   798  }