github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/generic_sched.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/go-multierror"
    10  	"github.com/hashicorp/nomad/helper/uuid"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  )
    13  
    14  const (
    15  	// maxServiceScheduleAttempts is used to limit the number of times
    16  	// we will attempt to schedule if we continue to hit conflicts for services.
    17  	maxServiceScheduleAttempts = 5
    18  
    19  	// maxBatchScheduleAttempts is used to limit the number of times
    20  	// we will attempt to schedule if we continue to hit conflicts for batch.
    21  	maxBatchScheduleAttempts = 2
    22  
    23  	// allocNotNeeded is the status used when a job no longer requires an allocation
    24  	allocNotNeeded = "alloc not needed due to job update"
    25  
    26  	// allocMigrating is the status used when we must migrate an allocation
    27  	allocMigrating = "alloc is being migrated"
    28  
    29  	// allocUpdating is the status used when a job requires an update
    30  	allocUpdating = "alloc is being updated due to job update"
    31  
    32  	// allocLost is the status used when an allocation is lost
    33  	allocLost = "alloc is lost since its node is down"
    34  
    35  	// allocInPlace is the status used when speculating on an in-place update
    36  	allocInPlace = "alloc updating in-place"
    37  
    38  	// allocNodeTainted is the status used when stopping an alloc because it's
    39  	// node is tainted.
    40  	allocNodeTainted = "alloc not needed as node is tainted"
    41  
    42  	// blockedEvalMaxPlanDesc is the description used for blocked evals that are
    43  	// a result of hitting the max number of plan attempts
    44  	blockedEvalMaxPlanDesc = "created due to placement conflicts"
    45  
    46  	// blockedEvalFailedPlacements is the description used for blocked evals
    47  	// that are a result of failing to place all allocations.
    48  	blockedEvalFailedPlacements = "created to place remaining allocations"
    49  
    50  	// reschedulingFollowupEvalDesc is the description used when creating follow
    51  	// up evals for delayed rescheduling
    52  	reschedulingFollowupEvalDesc = "created for delayed rescheduling"
    53  
    54  	// maxPastRescheduleEvents is the maximum number of past reschedule event
    55  	// that we track when unlimited rescheduling is enabled
    56  	maxPastRescheduleEvents = 5
    57  )
    58  
    59  // SetStatusError is used to set the status of the evaluation to the given error
    60  type SetStatusError struct {
    61  	Err        error
    62  	EvalStatus string
    63  }
    64  
    65  func (s *SetStatusError) Error() string {
    66  	return s.Err.Error()
    67  }
    68  
    69  // GenericScheduler is used for 'service' and 'batch' type jobs. This scheduler is
    70  // designed for long-lived services, and as such spends more time attempting
    71  // to make a high quality placement. This is the primary scheduler for
    72  // most workloads. It also supports a 'batch' mode to optimize for fast decision
    73  // making at the cost of quality.
    74  type GenericScheduler struct {
    75  	logger  *log.Logger
    76  	state   State
    77  	planner Planner
    78  	batch   bool
    79  
    80  	eval       *structs.Evaluation
    81  	job        *structs.Job
    82  	plan       *structs.Plan
    83  	planResult *structs.PlanResult
    84  	ctx        *EvalContext
    85  	stack      *GenericStack
    86  
    87  	followUpEvals []*structs.Evaluation
    88  
    89  	deployment *structs.Deployment
    90  
    91  	blocked        *structs.Evaluation
    92  	failedTGAllocs map[string]*structs.AllocMetric
    93  	queuedAllocs   map[string]int
    94  }
    95  
    96  // NewServiceScheduler is a factory function to instantiate a new service scheduler
    97  func NewServiceScheduler(logger *log.Logger, state State, planner Planner) Scheduler {
    98  	s := &GenericScheduler{
    99  		logger:  logger,
   100  		state:   state,
   101  		planner: planner,
   102  		batch:   false,
   103  	}
   104  	return s
   105  }
   106  
   107  // NewBatchScheduler is a factory function to instantiate a new batch scheduler
   108  func NewBatchScheduler(logger *log.Logger, state State, planner Planner) Scheduler {
   109  	s := &GenericScheduler{
   110  		logger:  logger,
   111  		state:   state,
   112  		planner: planner,
   113  		batch:   true,
   114  	}
   115  	return s
   116  }
   117  
   118  // Process is used to handle a single evaluation
   119  func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
   120  	// Store the evaluation
   121  	s.eval = eval
   122  
   123  	// Verify the evaluation trigger reason is understood
   124  	switch eval.TriggeredBy {
   125  	case structs.EvalTriggerJobRegister, structs.EvalTriggerJobDeregister,
   126  		structs.EvalTriggerNodeDrain, structs.EvalTriggerNodeUpdate,
   127  		structs.EvalTriggerRollingUpdate,
   128  		structs.EvalTriggerPeriodicJob, structs.EvalTriggerMaxPlans,
   129  		structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerRetryFailedAlloc:
   130  	default:
   131  		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
   132  			eval.TriggeredBy)
   133  		return setStatus(s.logger, s.planner, s.eval, nil, s.blocked,
   134  			s.failedTGAllocs, structs.EvalStatusFailed, desc, s.queuedAllocs,
   135  			s.deployment.GetID())
   136  	}
   137  
   138  	// Retry up to the maxScheduleAttempts and reset if progress is made.
   139  	progress := func() bool { return progressMade(s.planResult) }
   140  	limit := maxServiceScheduleAttempts
   141  	if s.batch {
   142  		limit = maxBatchScheduleAttempts
   143  	}
   144  	if err := retryMax(limit, s.process, progress); err != nil {
   145  		if statusErr, ok := err.(*SetStatusError); ok {
   146  			// Scheduling was tried but made no forward progress so create a
   147  			// blocked eval to retry once resources become available.
   148  			var mErr multierror.Error
   149  			if err := s.createBlockedEval(true); err != nil {
   150  				mErr.Errors = append(mErr.Errors, err)
   151  			}
   152  			if err := setStatus(s.logger, s.planner, s.eval, nil, s.blocked,
   153  				s.failedTGAllocs, statusErr.EvalStatus, err.Error(),
   154  				s.queuedAllocs, s.deployment.GetID()); err != nil {
   155  				mErr.Errors = append(mErr.Errors, err)
   156  			}
   157  			return mErr.ErrorOrNil()
   158  		}
   159  		return err
   160  	}
   161  
   162  	// If the current evaluation is a blocked evaluation and we didn't place
   163  	// everything, do not update the status to complete.
   164  	if s.eval.Status == structs.EvalStatusBlocked && len(s.failedTGAllocs) != 0 {
   165  		e := s.ctx.Eligibility()
   166  		newEval := s.eval.Copy()
   167  		newEval.EscapedComputedClass = e.HasEscaped()
   168  		newEval.ClassEligibility = e.GetClasses()
   169  		newEval.QuotaLimitReached = e.QuotaLimitReached()
   170  		return s.planner.ReblockEval(newEval)
   171  	}
   172  
   173  	// Update the status to complete
   174  	return setStatus(s.logger, s.planner, s.eval, nil, s.blocked,
   175  		s.failedTGAllocs, structs.EvalStatusComplete, "", s.queuedAllocs,
   176  		s.deployment.GetID())
   177  }
   178  
   179  // createBlockedEval creates a blocked eval and submits it to the planner. If
   180  // failure is set to true, the eval's trigger reason reflects that.
   181  func (s *GenericScheduler) createBlockedEval(planFailure bool) error {
   182  	e := s.ctx.Eligibility()
   183  	escaped := e.HasEscaped()
   184  
   185  	// Only store the eligible classes if the eval hasn't escaped.
   186  	var classEligibility map[string]bool
   187  	if !escaped {
   188  		classEligibility = e.GetClasses()
   189  	}
   190  
   191  	s.blocked = s.eval.CreateBlockedEval(classEligibility, escaped, e.QuotaLimitReached())
   192  	if planFailure {
   193  		s.blocked.TriggeredBy = structs.EvalTriggerMaxPlans
   194  		s.blocked.StatusDescription = blockedEvalMaxPlanDesc
   195  	} else {
   196  		s.blocked.StatusDescription = blockedEvalFailedPlacements
   197  	}
   198  
   199  	return s.planner.CreateEval(s.blocked)
   200  }
   201  
   202  // process is wrapped in retryMax to iteratively run the handler until we have no
   203  // further work or we've made the maximum number of attempts.
   204  func (s *GenericScheduler) process() (bool, error) {
   205  	// Lookup the Job by ID
   206  	var err error
   207  	ws := memdb.NewWatchSet()
   208  	s.job, err = s.state.JobByID(ws, s.eval.Namespace, s.eval.JobID)
   209  	if err != nil {
   210  		return false, fmt.Errorf("failed to get job %q: %v", s.eval.JobID, err)
   211  	}
   212  
   213  	numTaskGroups := 0
   214  	stopped := s.job.Stopped()
   215  	if !stopped {
   216  		numTaskGroups = len(s.job.TaskGroups)
   217  	}
   218  	s.queuedAllocs = make(map[string]int, numTaskGroups)
   219  	s.followUpEvals = nil
   220  
   221  	// Create a plan
   222  	s.plan = s.eval.MakePlan(s.job)
   223  
   224  	if !s.batch {
   225  		// Get any existing deployment
   226  		s.deployment, err = s.state.LatestDeploymentByJobID(ws, s.eval.Namespace, s.eval.JobID)
   227  		if err != nil {
   228  			return false, fmt.Errorf("failed to get job deployment %q: %v", s.eval.JobID, err)
   229  		}
   230  	}
   231  
   232  	// Reset the failed allocations
   233  	s.failedTGAllocs = nil
   234  
   235  	// Create an evaluation context
   236  	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
   237  
   238  	// Construct the placement stack
   239  	s.stack = NewGenericStack(s.batch, s.ctx)
   240  	if !s.job.Stopped() {
   241  		s.stack.SetJob(s.job)
   242  	}
   243  
   244  	// Compute the target job allocations
   245  	if err := s.computeJobAllocs(); err != nil {
   246  		s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err)
   247  		return false, err
   248  	}
   249  
   250  	// If there are failed allocations, we need to create a blocked evaluation
   251  	// to place the failed allocations when resources become available. If the
   252  	// current evaluation is already a blocked eval, we reuse it.
   253  	if s.eval.Status != structs.EvalStatusBlocked && len(s.failedTGAllocs) != 0 && s.blocked == nil {
   254  		if err := s.createBlockedEval(false); err != nil {
   255  			s.logger.Printf("[ERR] sched: %#v failed to make blocked eval: %v", s.eval, err)
   256  			return false, err
   257  		}
   258  		s.logger.Printf("[DEBUG] sched: %#v: failed to place all allocations, blocked eval '%s' created", s.eval, s.blocked.ID)
   259  	}
   260  
   261  	// If the plan is a no-op, we can bail. If AnnotatePlan is set submit the plan
   262  	// anyways to get the annotations.
   263  	if s.plan.IsNoOp() && !s.eval.AnnotatePlan {
   264  		return true, nil
   265  	}
   266  
   267  	// Create follow up evals for any delayed reschedule eligible allocations
   268  	if len(s.followUpEvals) > 0 {
   269  		for _, eval := range s.followUpEvals {
   270  			eval.PreviousEval = s.eval.ID
   271  			// TODO(preetha) this should be batching evals before inserting them
   272  			if err := s.planner.CreateEval(eval); err != nil {
   273  				s.logger.Printf("[ERR] sched: %#v failed to make next eval for rescheduling: %v", s.eval, err)
   274  				return false, err
   275  			}
   276  			s.logger.Printf("[DEBUG] sched: %#v: found reschedulable allocs, next eval '%s' created", s.eval, eval.ID)
   277  		}
   278  	}
   279  
   280  	// Submit the plan and store the results.
   281  	result, newState, err := s.planner.SubmitPlan(s.plan)
   282  	s.planResult = result
   283  	if err != nil {
   284  		return false, err
   285  	}
   286  
   287  	// Decrement the number of allocations pending per task group based on the
   288  	// number of allocations successfully placed
   289  	adjustQueuedAllocations(s.logger, result, s.queuedAllocs)
   290  
   291  	// If we got a state refresh, try again since we have stale data
   292  	if newState != nil {
   293  		s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval)
   294  		s.state = newState
   295  		return false, nil
   296  	}
   297  
   298  	// Try again if the plan was not fully committed, potential conflict
   299  	fullCommit, expected, actual := result.FullCommit(s.plan)
   300  	if !fullCommit {
   301  		s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed",
   302  			s.eval, expected, actual)
   303  		if newState == nil {
   304  			return false, fmt.Errorf("missing state refresh after partial commit")
   305  		}
   306  		return false, nil
   307  	}
   308  
   309  	// Success!
   310  	return true, nil
   311  }
   312  
   313  // computeJobAllocs is used to reconcile differences between the job,
   314  // existing allocations and node status to update the allocations.
   315  func (s *GenericScheduler) computeJobAllocs() error {
   316  	// Lookup the allocations by JobID
   317  	ws := memdb.NewWatchSet()
   318  	allocs, err := s.state.AllocsByJob(ws, s.eval.Namespace, s.eval.JobID, true)
   319  	if err != nil {
   320  		return fmt.Errorf("failed to get allocs for job '%s': %v",
   321  			s.eval.JobID, err)
   322  	}
   323  
   324  	// Determine the tainted nodes containing job allocs
   325  	tainted, err := taintedNodes(s.state, allocs)
   326  	if err != nil {
   327  		return fmt.Errorf("failed to get tainted nodes for job '%s': %v",
   328  			s.eval.JobID, err)
   329  	}
   330  
   331  	// Update the allocations which are in pending/running state on tainted
   332  	// nodes to lost
   333  	updateNonTerminalAllocsToLost(s.plan, tainted, allocs)
   334  
   335  	reconciler := NewAllocReconciler(s.ctx.Logger(),
   336  		genericAllocUpdateFn(s.ctx, s.stack, s.eval.ID),
   337  		s.batch, s.eval.JobID, s.job, s.deployment, allocs, tainted, s.eval.ID)
   338  	results := reconciler.Compute()
   339  	s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, results)
   340  
   341  	if s.eval.AnnotatePlan {
   342  		s.plan.Annotations = &structs.PlanAnnotations{
   343  			DesiredTGUpdates: results.desiredTGUpdates,
   344  		}
   345  	}
   346  
   347  	// Add the deployment changes to the plan
   348  	s.plan.Deployment = results.deployment
   349  	s.plan.DeploymentUpdates = results.deploymentUpdates
   350  
   351  	// Store all the follow up evaluations from rescheduled allocations
   352  	if len(results.desiredFollowupEvals) > 0 {
   353  		for _, evals := range results.desiredFollowupEvals {
   354  			s.followUpEvals = append(s.followUpEvals, evals...)
   355  		}
   356  	}
   357  
   358  	// Update the stored deployment
   359  	if results.deployment != nil {
   360  		s.deployment = results.deployment
   361  	}
   362  
   363  	// Handle the stop
   364  	for _, stop := range results.stop {
   365  		s.plan.AppendUpdate(stop.alloc, structs.AllocDesiredStatusStop, stop.statusDescription, stop.clientStatus)
   366  	}
   367  
   368  	// Handle the in-place updates
   369  	for _, update := range results.inplaceUpdate {
   370  		if update.DeploymentID != s.deployment.GetID() {
   371  			update.DeploymentID = s.deployment.GetID()
   372  			update.DeploymentStatus = nil
   373  		}
   374  		s.ctx.Plan().AppendAlloc(update)
   375  	}
   376  
   377  	// Handle the annotation updates
   378  	for _, update := range results.attributeUpdates {
   379  		s.ctx.Plan().AppendAlloc(update)
   380  	}
   381  
   382  	// Nothing remaining to do if placement is not required
   383  	if len(results.place)+len(results.destructiveUpdate) == 0 {
   384  		// If the job has been purged we don't have access to the job. Otherwise
   385  		// set the queued allocs to zero. This is true if the job is being
   386  		// stopped as well.
   387  		if s.job != nil {
   388  			for _, tg := range s.job.TaskGroups {
   389  				s.queuedAllocs[tg.Name] = 0
   390  			}
   391  		}
   392  		return nil
   393  	}
   394  
   395  	// Record the number of allocations that needs to be placed per Task Group
   396  	for _, place := range results.place {
   397  		s.queuedAllocs[place.taskGroup.Name] += 1
   398  	}
   399  	for _, destructive := range results.destructiveUpdate {
   400  		s.queuedAllocs[destructive.placeTaskGroup.Name] += 1
   401  	}
   402  
   403  	// Compute the placements
   404  	place := make([]placementResult, 0, len(results.place))
   405  	for _, p := range results.place {
   406  		place = append(place, p)
   407  	}
   408  
   409  	destructive := make([]placementResult, 0, len(results.destructiveUpdate))
   410  	for _, p := range results.destructiveUpdate {
   411  		destructive = append(destructive, p)
   412  	}
   413  	return s.computePlacements(destructive, place)
   414  }
   415  
   416  // computePlacements computes placements for allocations. It is given the set of
   417  // destructive updates to place and the set of new placements to place.
   418  func (s *GenericScheduler) computePlacements(destructive, place []placementResult) error {
   419  	// Get the base nodes
   420  	nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters)
   421  	if err != nil {
   422  		return err
   423  	}
   424  
   425  	var deploymentID string
   426  	if s.deployment != nil && s.deployment.Active() {
   427  		deploymentID = s.deployment.ID
   428  	}
   429  
   430  	// Update the set of placement nodes
   431  	s.stack.SetNodes(nodes)
   432  
   433  	// Capture current time to use as the start time for any rescheduled allocations
   434  	now := time.Now()
   435  
   436  	// Have to handle destructive changes first as we need to discount their
   437  	// resources. To understand this imagine the resources were reduced and the
   438  	// count was scaled up.
   439  	for _, results := range [][]placementResult{destructive, place} {
   440  		for _, missing := range results {
   441  			// Get the task group
   442  			tg := missing.TaskGroup()
   443  
   444  			// Check if this task group has already failed
   445  			if metric, ok := s.failedTGAllocs[tg.Name]; ok {
   446  				metric.CoalescedFailures += 1
   447  				continue
   448  			}
   449  
   450  			// Find the preferred node
   451  			preferredNode, err := s.findPreferredNode(missing)
   452  			if err != nil {
   453  				return err
   454  			}
   455  
   456  			// Check if we should stop the previous allocation upon successful
   457  			// placement of its replacement. This allow atomic placements/stops. We
   458  			// stop the allocation before trying to find a replacement because this
   459  			// frees the resources currently used by the previous allocation.
   460  			stopPrevAlloc, stopPrevAllocDesc := missing.StopPreviousAlloc()
   461  			prevAllocation := missing.PreviousAllocation()
   462  			if stopPrevAlloc {
   463  				s.plan.AppendUpdate(prevAllocation, structs.AllocDesiredStatusStop, stopPrevAllocDesc, "")
   464  			}
   465  
   466  			// Compute penalty nodes for rescheduled allocs
   467  			selectOptions := getSelectOptions(prevAllocation, preferredNode)
   468  			option, _ := s.stack.Select(tg, selectOptions)
   469  
   470  			// Store the available nodes by datacenter
   471  			s.ctx.Metrics().NodesAvailable = byDC
   472  
   473  			// Compute top K scoring node metadata
   474  			s.ctx.Metrics().PopulateScoreMetaData()
   475  
   476  			// Set fields based on if we found an allocation option
   477  			if option != nil {
   478  				// Create an allocation for this
   479  				alloc := &structs.Allocation{
   480  					ID:            uuid.Generate(),
   481  					Namespace:     s.job.Namespace,
   482  					EvalID:        s.eval.ID,
   483  					Name:          missing.Name(),
   484  					JobID:         s.job.ID,
   485  					TaskGroup:     tg.Name,
   486  					Metrics:       s.ctx.Metrics(),
   487  					NodeID:        option.Node.ID,
   488  					DeploymentID:  deploymentID,
   489  					TaskResources: option.TaskResources,
   490  					DesiredStatus: structs.AllocDesiredStatusRun,
   491  					ClientStatus:  structs.AllocClientStatusPending,
   492  
   493  					SharedResources: &structs.Resources{
   494  						DiskMB: tg.EphemeralDisk.SizeMB,
   495  					},
   496  				}
   497  
   498  				// If the new allocation is replacing an older allocation then we
   499  				// set the record the older allocation id so that they are chained
   500  				if prevAllocation != nil {
   501  					alloc.PreviousAllocation = prevAllocation.ID
   502  					if missing.IsRescheduling() {
   503  						updateRescheduleTracker(alloc, prevAllocation, now)
   504  					}
   505  				}
   506  
   507  				// If we are placing a canary and we found a match, add the canary
   508  				// to the deployment state object and mark it as a canary.
   509  				if missing.Canary() {
   510  					if state, ok := s.deployment.TaskGroups[tg.Name]; ok {
   511  						state.PlacedCanaries = append(state.PlacedCanaries, alloc.ID)
   512  					}
   513  
   514  					alloc.DeploymentStatus = &structs.AllocDeploymentStatus{
   515  						Canary: true,
   516  					}
   517  				}
   518  
   519  				// Track the placement
   520  				s.plan.AppendAlloc(alloc)
   521  
   522  			} else {
   523  				// Lazy initialize the failed map
   524  				if s.failedTGAllocs == nil {
   525  					s.failedTGAllocs = make(map[string]*structs.AllocMetric)
   526  				}
   527  
   528  				// Track the fact that we didn't find a placement
   529  				s.failedTGAllocs[tg.Name] = s.ctx.Metrics()
   530  
   531  				// If we weren't able to find a replacement for the allocation, back
   532  				// out the fact that we asked to stop the allocation.
   533  				if stopPrevAlloc {
   534  					s.plan.PopUpdate(prevAllocation)
   535  				}
   536  			}
   537  
   538  		}
   539  	}
   540  
   541  	return nil
   542  }
   543  
   544  // getSelectOptions sets up preferred nodes and penalty nodes
   545  func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs.Node) *SelectOptions {
   546  	selectOptions := &SelectOptions{}
   547  	if prevAllocation != nil {
   548  		penaltyNodes := make(map[string]struct{})
   549  		penaltyNodes[prevAllocation.NodeID] = struct{}{}
   550  		if prevAllocation.RescheduleTracker != nil {
   551  			for _, reschedEvent := range prevAllocation.RescheduleTracker.Events {
   552  				penaltyNodes[reschedEvent.PrevNodeID] = struct{}{}
   553  			}
   554  		}
   555  		selectOptions.PenaltyNodeIDs = penaltyNodes
   556  	}
   557  	if preferredNode != nil {
   558  		selectOptions.PreferredNodes = []*structs.Node{preferredNode}
   559  	}
   560  	return selectOptions
   561  }
   562  
   563  // updateRescheduleTracker carries over previous restart attempts and adds the most recent restart
   564  func updateRescheduleTracker(alloc *structs.Allocation, prev *structs.Allocation, now time.Time) {
   565  	reschedPolicy := prev.ReschedulePolicy()
   566  	var rescheduleEvents []*structs.RescheduleEvent
   567  	if prev.RescheduleTracker != nil {
   568  		var interval time.Duration
   569  		if reschedPolicy != nil {
   570  			interval = reschedPolicy.Interval
   571  		}
   572  		// If attempts is set copy all events in the interval range
   573  		if reschedPolicy.Attempts > 0 {
   574  			for _, reschedEvent := range prev.RescheduleTracker.Events {
   575  				timeDiff := now.UnixNano() - reschedEvent.RescheduleTime
   576  				// Only copy over events that are within restart interval
   577  				// This keeps the list of events small in cases where there's a long chain of old restart events
   578  				if interval > 0 && timeDiff <= interval.Nanoseconds() {
   579  					rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy())
   580  				}
   581  			}
   582  		} else {
   583  			// Only copy the last n if unlimited is set
   584  			start := 0
   585  			if len(prev.RescheduleTracker.Events) > maxPastRescheduleEvents {
   586  				start = len(prev.RescheduleTracker.Events) - maxPastRescheduleEvents
   587  			}
   588  			for i := start; i < len(prev.RescheduleTracker.Events); i++ {
   589  				reschedEvent := prev.RescheduleTracker.Events[i]
   590  				rescheduleEvents = append(rescheduleEvents, reschedEvent.Copy())
   591  			}
   592  		}
   593  	}
   594  	nextDelay := prev.NextDelay()
   595  	rescheduleEvent := structs.NewRescheduleEvent(now.UnixNano(), prev.ID, prev.NodeID, nextDelay)
   596  	rescheduleEvents = append(rescheduleEvents, rescheduleEvent)
   597  	alloc.RescheduleTracker = &structs.RescheduleTracker{Events: rescheduleEvents}
   598  }
   599  
   600  // findPreferredNode finds the preferred node for an allocation
   601  func (s *GenericScheduler) findPreferredNode(place placementResult) (*structs.Node, error) {
   602  	if prev := place.PreviousAllocation(); prev != nil && place.TaskGroup().EphemeralDisk.Sticky == true {
   603  		var preferredNode *structs.Node
   604  		ws := memdb.NewWatchSet()
   605  		preferredNode, err := s.state.NodeByID(ws, prev.NodeID)
   606  		if err != nil {
   607  			return nil, err
   608  		}
   609  
   610  		if preferredNode != nil && preferredNode.Ready() {
   611  			return preferredNode, nil
   612  		}
   613  	}
   614  	return nil, nil
   615  }