github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/core_sched.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/nomad/nomad/state"
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/hashicorp/nomad/scheduler"
    12  )
    13  
    14  var (
    15  	// maxIdsPerReap is the maximum number of evals and allocations to reap in a
    16  	// single Raft transaction. This is to ensure that the Raft message does not
    17  	// become too large.
    18  	maxIdsPerReap = (1024 * 256) / 36 // 0.25 MB of ids.
    19  )
    20  
    21  // CoreScheduler is a special "scheduler" that is registered
    22  // as "_core". It is used to run various administrative work
    23  // across the cluster.
    24  type CoreScheduler struct {
    25  	srv  *Server
    26  	snap *state.StateSnapshot
    27  }
    28  
    29  // NewCoreScheduler is used to return a new system scheduler instance
    30  func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler {
    31  	s := &CoreScheduler{
    32  		srv:  srv,
    33  		snap: snap,
    34  	}
    35  	return s
    36  }
    37  
    38  // Process is used to implement the scheduler.Scheduler interface
    39  func (c *CoreScheduler) Process(eval *structs.Evaluation) error {
    40  	switch eval.JobID {
    41  	case structs.CoreJobEvalGC:
    42  		return c.evalGC(eval)
    43  	case structs.CoreJobNodeGC:
    44  		return c.nodeGC(eval)
    45  	case structs.CoreJobJobGC:
    46  		return c.jobGC(eval)
    47  	case structs.CoreJobDeploymentGC:
    48  		return c.deploymentGC(eval)
    49  	case structs.CoreJobForceGC:
    50  		return c.forceGC(eval)
    51  	default:
    52  		return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID)
    53  	}
    54  }
    55  
    56  // forceGC is used to garbage collect all eligible objects.
    57  func (c *CoreScheduler) forceGC(eval *structs.Evaluation) error {
    58  	if err := c.jobGC(eval); err != nil {
    59  		return err
    60  	}
    61  	if err := c.evalGC(eval); err != nil {
    62  		return err
    63  	}
    64  	if err := c.deploymentGC(eval); err != nil {
    65  		return err
    66  	}
    67  
    68  	// Node GC must occur after the others to ensure the allocations are
    69  	// cleared.
    70  	return c.nodeGC(eval)
    71  }
    72  
    73  // jobGC is used to garbage collect eligible jobs.
    74  func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error {
    75  	// Get all the jobs eligible for garbage collection.
    76  	ws := memdb.NewWatchSet()
    77  	iter, err := c.snap.JobsByGC(ws, true)
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	var oldThreshold uint64
    83  	if eval.JobID == structs.CoreJobForceGC {
    84  		// The GC was forced, so set the threshold to its maximum so everything
    85  		// will GC.
    86  		oldThreshold = math.MaxUint64
    87  		c.srv.logger.Println("[DEBUG] sched.core: forced job GC")
    88  	} else {
    89  		// Get the time table to calculate GC cutoffs.
    90  		tt := c.srv.fsm.TimeTable()
    91  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.JobGCThreshold)
    92  		oldThreshold = tt.NearestIndex(cutoff)
    93  		c.srv.logger.Printf("[DEBUG] sched.core: job GC: scanning before index %d (%v)",
    94  			oldThreshold, c.srv.config.JobGCThreshold)
    95  	}
    96  
    97  	// Collect the allocations, evaluations and jobs to GC
    98  	var gcAlloc, gcEval []string
    99  	var gcJob []*structs.Job
   100  
   101  OUTER:
   102  	for i := iter.Next(); i != nil; i = iter.Next() {
   103  		job := i.(*structs.Job)
   104  
   105  		// Ignore new jobs.
   106  		if job.CreateIndex > oldThreshold {
   107  			continue
   108  		}
   109  
   110  		ws := memdb.NewWatchSet()
   111  		evals, err := c.snap.EvalsByJob(ws, job.Namespace, job.ID)
   112  		if err != nil {
   113  			c.srv.logger.Printf("[ERR] sched.core: failed to get evals for job %s: %v", job.ID, err)
   114  			continue
   115  		}
   116  
   117  		allEvalsGC := true
   118  		var jobAlloc, jobEval []string
   119  		for _, eval := range evals {
   120  			gc, allocs, err := c.gcEval(eval, oldThreshold, true)
   121  			if err != nil {
   122  				continue OUTER
   123  			}
   124  
   125  			if gc {
   126  				jobEval = append(jobEval, eval.ID)
   127  				jobAlloc = append(jobAlloc, allocs...)
   128  			} else {
   129  				allEvalsGC = false
   130  				break
   131  			}
   132  		}
   133  
   134  		// Job is eligible for garbage collection
   135  		if allEvalsGC {
   136  			gcJob = append(gcJob, job)
   137  			gcAlloc = append(gcAlloc, jobAlloc...)
   138  			gcEval = append(gcEval, jobEval...)
   139  		}
   140  	}
   141  
   142  	// Fast-path the nothing case
   143  	if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 {
   144  		return nil
   145  	}
   146  	c.srv.logger.Printf("[DEBUG] sched.core: job GC: %d jobs, %d evaluations, %d allocs eligible",
   147  		len(gcJob), len(gcEval), len(gcAlloc))
   148  
   149  	// Reap the evals and allocs
   150  	if err := c.evalReap(gcEval, gcAlloc); err != nil {
   151  		return err
   152  	}
   153  
   154  	// Reap the jobs
   155  	return c.jobReap(gcJob, eval.LeaderACL)
   156  }
   157  
   158  // jobReap contacts the leader and issues a reap on the passed jobs
   159  func (c *CoreScheduler) jobReap(jobs []*structs.Job, leaderACL string) error {
   160  	// Call to the leader to issue the reap
   161  	for _, req := range c.partitionJobReap(jobs, leaderACL) {
   162  		var resp structs.JobBatchDeregisterResponse
   163  		if err := c.srv.RPC("Job.BatchDeregister", req, &resp); err != nil {
   164  			c.srv.logger.Printf("[ERR] sched.core: batch job reap failed: %v", err)
   165  			return err
   166  		}
   167  	}
   168  
   169  	return nil
   170  }
   171  
   172  // partitionJobReap returns a list of JobBatchDeregisterRequests to make,
   173  // ensuring a single request does not contain too many jobs. This is necessary
   174  // to ensure that the Raft transaction does not become too large.
   175  func (c *CoreScheduler) partitionJobReap(jobs []*structs.Job, leaderACL string) []*structs.JobBatchDeregisterRequest {
   176  	option := &structs.JobDeregisterOptions{Purge: true}
   177  	var requests []*structs.JobBatchDeregisterRequest
   178  	submittedJobs := 0
   179  	for submittedJobs != len(jobs) {
   180  		req := &structs.JobBatchDeregisterRequest{
   181  			Jobs: make(map[structs.NamespacedID]*structs.JobDeregisterOptions),
   182  			WriteRequest: structs.WriteRequest{
   183  				Region:    c.srv.config.Region,
   184  				AuthToken: leaderACL,
   185  			},
   186  		}
   187  		requests = append(requests, req)
   188  		available := maxIdsPerReap
   189  
   190  		if remaining := len(jobs) - submittedJobs; remaining > 0 {
   191  			if remaining <= available {
   192  				for _, job := range jobs[submittedJobs:] {
   193  					jns := structs.NamespacedID{ID: job.ID, Namespace: job.Namespace}
   194  					req.Jobs[jns] = option
   195  				}
   196  				submittedJobs += remaining
   197  			} else {
   198  				for _, job := range jobs[submittedJobs : submittedJobs+available] {
   199  					jns := structs.NamespacedID{ID: job.ID, Namespace: job.Namespace}
   200  					req.Jobs[jns] = option
   201  				}
   202  				submittedJobs += available
   203  			}
   204  		}
   205  	}
   206  
   207  	return requests
   208  }
   209  
   210  // evalGC is used to garbage collect old evaluations
   211  func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error {
   212  	// Iterate over the evaluations
   213  	ws := memdb.NewWatchSet()
   214  	iter, err := c.snap.Evals(ws)
   215  	if err != nil {
   216  		return err
   217  	}
   218  
   219  	var oldThreshold uint64
   220  	if eval.JobID == structs.CoreJobForceGC {
   221  		// The GC was forced, so set the threshold to its maximum so everything
   222  		// will GC.
   223  		oldThreshold = math.MaxUint64
   224  		c.srv.logger.Println("[DEBUG] sched.core: forced eval GC")
   225  	} else {
   226  		// Compute the old threshold limit for GC using the FSM
   227  		// time table.  This is a rough mapping of a time to the
   228  		// Raft index it belongs to.
   229  		tt := c.srv.fsm.TimeTable()
   230  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold)
   231  		oldThreshold = tt.NearestIndex(cutoff)
   232  		c.srv.logger.Printf("[DEBUG] sched.core: eval GC: scanning before index %d (%v)",
   233  			oldThreshold, c.srv.config.EvalGCThreshold)
   234  	}
   235  
   236  	// Collect the allocations and evaluations to GC
   237  	var gcAlloc, gcEval []string
   238  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
   239  		eval := raw.(*structs.Evaluation)
   240  
   241  		// The Evaluation GC should not handle batch jobs since those need to be
   242  		// garbage collected in one shot
   243  		gc, allocs, err := c.gcEval(eval, oldThreshold, false)
   244  		if err != nil {
   245  			return err
   246  		}
   247  
   248  		if gc {
   249  			gcEval = append(gcEval, eval.ID)
   250  		}
   251  		gcAlloc = append(gcAlloc, allocs...)
   252  	}
   253  
   254  	// Fast-path the nothing case
   255  	if len(gcEval) == 0 && len(gcAlloc) == 0 {
   256  		return nil
   257  	}
   258  	c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible",
   259  		len(gcEval), len(gcAlloc))
   260  
   261  	return c.evalReap(gcEval, gcAlloc)
   262  }
   263  
   264  // gcEval returns whether the eval should be garbage collected given a raft
   265  // threshold index. The eval disqualifies for garbage collection if it or its
   266  // allocs are not older than the threshold. If the eval should be garbage
   267  // collected, the associated alloc ids that should also be removed are also
   268  // returned
   269  func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64, allowBatch bool) (
   270  	bool, []string, error) {
   271  	// Ignore non-terminal and new evaluations
   272  	if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex {
   273  		return false, nil, nil
   274  	}
   275  
   276  	// Create a watchset
   277  	ws := memdb.NewWatchSet()
   278  
   279  	// Look up the job
   280  	job, err := c.snap.JobByID(ws, eval.Namespace, eval.JobID)
   281  	if err != nil {
   282  		return false, nil, err
   283  	}
   284  
   285  	// If the eval is from a running "batch" job we don't want to garbage
   286  	// collect its allocations. If there is a long running batch job and its
   287  	// terminal allocations get GC'd the scheduler would re-run the
   288  	// allocations.
   289  	if eval.Type == structs.JobTypeBatch {
   290  		// Check if the job is running
   291  
   292  		// Can collect if:
   293  		// Job doesn't exist
   294  		// Job is Stopped and dead
   295  		// allowBatch and the job is dead
   296  		collect := false
   297  		if job == nil {
   298  			collect = true
   299  		} else if job.Status != structs.JobStatusDead {
   300  			collect = false
   301  		} else if job.Stop {
   302  			collect = true
   303  		} else if allowBatch {
   304  			collect = true
   305  		}
   306  
   307  		// We don't want to gc anything related to a job which is not dead
   308  		// If the batch job doesn't exist we can GC it regardless of allowBatch
   309  		if !collect {
   310  			return false, nil, nil
   311  		}
   312  	}
   313  
   314  	// Get the allocations by eval
   315  	allocs, err := c.snap.AllocsByEval(ws, eval.ID)
   316  	if err != nil {
   317  		c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v",
   318  			eval.ID, err)
   319  		return false, nil, err
   320  	}
   321  
   322  	// Scan the allocations to ensure they are terminal and old
   323  	gcEval := true
   324  	var gcAllocIDs []string
   325  	for _, alloc := range allocs {
   326  		if !allocGCEligible(alloc, job, time.Now(), thresholdIndex) {
   327  			// Can't GC the evaluation since not all of the allocations are
   328  			// terminal
   329  			gcEval = false
   330  		} else {
   331  			// The allocation is eligible to be GC'd
   332  			gcAllocIDs = append(gcAllocIDs, alloc.ID)
   333  		}
   334  	}
   335  
   336  	return gcEval, gcAllocIDs, nil
   337  }
   338  
   339  // evalReap contacts the leader and issues a reap on the passed evals and
   340  // allocs.
   341  func (c *CoreScheduler) evalReap(evals, allocs []string) error {
   342  	// Call to the leader to issue the reap
   343  	for _, req := range c.partitionEvalReap(evals, allocs) {
   344  		var resp structs.GenericResponse
   345  		if err := c.srv.RPC("Eval.Reap", req, &resp); err != nil {
   346  			c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err)
   347  			return err
   348  		}
   349  	}
   350  
   351  	return nil
   352  }
   353  
   354  // partitionEvalReap returns a list of EvalDeleteRequest to make, ensuring a single
   355  // request does not contain too many allocations and evaluations. This is
   356  // necessary to ensure that the Raft transaction does not become too large.
   357  func (c *CoreScheduler) partitionEvalReap(evals, allocs []string) []*structs.EvalDeleteRequest {
   358  	var requests []*structs.EvalDeleteRequest
   359  	submittedEvals, submittedAllocs := 0, 0
   360  	for submittedEvals != len(evals) || submittedAllocs != len(allocs) {
   361  		req := &structs.EvalDeleteRequest{
   362  			WriteRequest: structs.WriteRequest{
   363  				Region: c.srv.config.Region,
   364  			},
   365  		}
   366  		requests = append(requests, req)
   367  		available := maxIdsPerReap
   368  
   369  		// Add the allocs first
   370  		if remaining := len(allocs) - submittedAllocs; remaining > 0 {
   371  			if remaining <= available {
   372  				req.Allocs = allocs[submittedAllocs:]
   373  				available -= remaining
   374  				submittedAllocs += remaining
   375  			} else {
   376  				req.Allocs = allocs[submittedAllocs : submittedAllocs+available]
   377  				submittedAllocs += available
   378  
   379  				// Exhausted space so skip adding evals
   380  				continue
   381  			}
   382  		}
   383  
   384  		// Add the evals
   385  		if remaining := len(evals) - submittedEvals; remaining > 0 {
   386  			if remaining <= available {
   387  				req.Evals = evals[submittedEvals:]
   388  				submittedEvals += remaining
   389  			} else {
   390  				req.Evals = evals[submittedEvals : submittedEvals+available]
   391  				submittedEvals += available
   392  			}
   393  		}
   394  	}
   395  
   396  	return requests
   397  }
   398  
   399  // nodeGC is used to garbage collect old nodes
   400  func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error {
   401  	// Iterate over the evaluations
   402  	ws := memdb.NewWatchSet()
   403  	iter, err := c.snap.Nodes(ws)
   404  	if err != nil {
   405  		return err
   406  	}
   407  
   408  	var oldThreshold uint64
   409  	if eval.JobID == structs.CoreJobForceGC {
   410  		// The GC was forced, so set the threshold to its maximum so everything
   411  		// will GC.
   412  		oldThreshold = math.MaxUint64
   413  		c.srv.logger.Println("[DEBUG] sched.core: forced node GC")
   414  	} else {
   415  		// Compute the old threshold limit for GC using the FSM
   416  		// time table.  This is a rough mapping of a time to the
   417  		// Raft index it belongs to.
   418  		tt := c.srv.fsm.TimeTable()
   419  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.NodeGCThreshold)
   420  		oldThreshold = tt.NearestIndex(cutoff)
   421  		c.srv.logger.Printf("[DEBUG] sched.core: node GC: scanning before index %d (%v)",
   422  			oldThreshold, c.srv.config.NodeGCThreshold)
   423  	}
   424  
   425  	// Collect the nodes to GC
   426  	var gcNode []string
   427  OUTER:
   428  	for {
   429  		raw := iter.Next()
   430  		if raw == nil {
   431  			break
   432  		}
   433  		node := raw.(*structs.Node)
   434  
   435  		// Ignore non-terminal and new nodes
   436  		if !node.TerminalStatus() || node.ModifyIndex > oldThreshold {
   437  			continue
   438  		}
   439  
   440  		// Get the allocations by node
   441  		ws := memdb.NewWatchSet()
   442  		allocs, err := c.snap.AllocsByNode(ws, node.ID)
   443  		if err != nil {
   444  			c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for node %s: %v",
   445  				eval.ID, err)
   446  			continue
   447  		}
   448  
   449  		// If there are any non-terminal allocations, skip the node. If the node
   450  		// is terminal and the allocations are not, the scheduler may not have
   451  		// run yet to transition the allocs on the node to terminal. We delay
   452  		// GC'ing until this happens.
   453  		for _, alloc := range allocs {
   454  			if !alloc.TerminalStatus() {
   455  				continue OUTER
   456  			}
   457  		}
   458  
   459  		// Node is eligible for garbage collection
   460  		gcNode = append(gcNode, node.ID)
   461  	}
   462  
   463  	// Fast-path the nothing case
   464  	if len(gcNode) == 0 {
   465  		return nil
   466  	}
   467  	c.srv.logger.Printf("[DEBUG] sched.core: node GC: %d nodes eligible", len(gcNode))
   468  
   469  	// Call to the leader to issue the reap
   470  	for _, nodeID := range gcNode {
   471  		req := structs.NodeDeregisterRequest{
   472  			NodeID: nodeID,
   473  			WriteRequest: structs.WriteRequest{
   474  				Region:    c.srv.config.Region,
   475  				AuthToken: eval.LeaderACL,
   476  			},
   477  		}
   478  		var resp structs.NodeUpdateResponse
   479  		if err := c.srv.RPC("Node.Deregister", &req, &resp); err != nil {
   480  			c.srv.logger.Printf("[ERR] sched.core: node '%s' reap failed: %v", nodeID, err)
   481  			return err
   482  		}
   483  	}
   484  	return nil
   485  }
   486  
   487  // deploymentGC is used to garbage collect old deployments
   488  func (c *CoreScheduler) deploymentGC(eval *structs.Evaluation) error {
   489  	// Iterate over the deployments
   490  	ws := memdb.NewWatchSet()
   491  	iter, err := c.snap.Deployments(ws)
   492  	if err != nil {
   493  		return err
   494  	}
   495  
   496  	var oldThreshold uint64
   497  	if eval.JobID == structs.CoreJobForceGC {
   498  		// The GC was forced, so set the threshold to its maximum so everything
   499  		// will GC.
   500  		oldThreshold = math.MaxUint64
   501  		c.srv.logger.Println("[DEBUG] sched.core: forced deployment GC")
   502  	} else {
   503  		// Compute the old threshold limit for GC using the FSM
   504  		// time table.  This is a rough mapping of a time to the
   505  		// Raft index it belongs to.
   506  		tt := c.srv.fsm.TimeTable()
   507  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.DeploymentGCThreshold)
   508  		oldThreshold = tt.NearestIndex(cutoff)
   509  		c.srv.logger.Printf("[DEBUG] sched.core: deployment GC: scanning before index %d (%v)",
   510  			oldThreshold, c.srv.config.DeploymentGCThreshold)
   511  	}
   512  
   513  	// Collect the deployments to GC
   514  	var gcDeployment []string
   515  
   516  OUTER:
   517  	for {
   518  		raw := iter.Next()
   519  		if raw == nil {
   520  			break
   521  		}
   522  		deploy := raw.(*structs.Deployment)
   523  
   524  		// Ignore non-terminal and new deployments
   525  		if deploy.Active() || deploy.ModifyIndex > oldThreshold {
   526  			continue
   527  		}
   528  
   529  		// Ensure there are no allocs referencing this deployment.
   530  		allocs, err := c.snap.AllocsByDeployment(ws, deploy.ID)
   531  		if err != nil {
   532  			c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for deployment %s: %v",
   533  				deploy.ID, err)
   534  			continue
   535  		}
   536  
   537  		// Ensure there is no allocation referencing the deployment.
   538  		for _, alloc := range allocs {
   539  			if !alloc.TerminalStatus() {
   540  				continue OUTER
   541  			}
   542  		}
   543  
   544  		// Deployment is eligible for garbage collection
   545  		gcDeployment = append(gcDeployment, deploy.ID)
   546  	}
   547  
   548  	// Fast-path the nothing case
   549  	if len(gcDeployment) == 0 {
   550  		return nil
   551  	}
   552  	c.srv.logger.Printf("[DEBUG] sched.core: deployment GC: %d deployments eligible", len(gcDeployment))
   553  	return c.deploymentReap(gcDeployment)
   554  }
   555  
   556  // deploymentReap contacts the leader and issues a reap on the passed
   557  // deployments.
   558  func (c *CoreScheduler) deploymentReap(deployments []string) error {
   559  	// Call to the leader to issue the reap
   560  	for _, req := range c.partitionDeploymentReap(deployments) {
   561  		var resp structs.GenericResponse
   562  		if err := c.srv.RPC("Deployment.Reap", req, &resp); err != nil {
   563  			c.srv.logger.Printf("[ERR] sched.core: deployment reap failed: %v", err)
   564  			return err
   565  		}
   566  	}
   567  
   568  	return nil
   569  }
   570  
   571  // partitionDeploymentReap returns a list of DeploymentDeleteRequest to make,
   572  // ensuring a single request does not contain too many deployments. This is
   573  // necessary to ensure that the Raft transaction does not become too large.
   574  func (c *CoreScheduler) partitionDeploymentReap(deployments []string) []*structs.DeploymentDeleteRequest {
   575  	var requests []*structs.DeploymentDeleteRequest
   576  	submittedDeployments := 0
   577  	for submittedDeployments != len(deployments) {
   578  		req := &structs.DeploymentDeleteRequest{
   579  			WriteRequest: structs.WriteRequest{
   580  				Region: c.srv.config.Region,
   581  			},
   582  		}
   583  		requests = append(requests, req)
   584  		available := maxIdsPerReap
   585  
   586  		if remaining := len(deployments) - submittedDeployments; remaining > 0 {
   587  			if remaining <= available {
   588  				req.Deployments = deployments[submittedDeployments:]
   589  				submittedDeployments += remaining
   590  			} else {
   591  				req.Deployments = deployments[submittedDeployments : submittedDeployments+available]
   592  				submittedDeployments += available
   593  			}
   594  		}
   595  	}
   596  
   597  	return requests
   598  }
   599  
   600  // allocGCEligible returns if the allocation is eligible to be garbage collected
   601  // according to its terminal status and its reschedule trackers
   602  func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time, thresholdIndex uint64) bool {
   603  	// Not in a terminal status and old enough
   604  	if !a.TerminalStatus() || a.ModifyIndex > thresholdIndex {
   605  		return false
   606  	}
   607  
   608  	// If the job is deleted, stopped or dead all allocs can be removed
   609  	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
   610  		return true
   611  	}
   612  
   613  	// If the allocation's desired state is Stop, it can be GCed even if it
   614  	// has failed and hasn't been rescheduled. This can happen during job updates
   615  	if a.DesiredStatus == structs.AllocDesiredStatusStop {
   616  		return true
   617  	}
   618  
   619  	// If the alloc hasn't failed then we don't need to consider it for rescheduling
   620  	// Rescheduling needs to copy over information from the previous alloc so that it
   621  	// can enforce the reschedule policy
   622  	if a.ClientStatus != structs.AllocClientStatusFailed {
   623  		return true
   624  	}
   625  
   626  	var reschedulePolicy *structs.ReschedulePolicy
   627  	tg := job.LookupTaskGroup(a.TaskGroup)
   628  
   629  	if tg != nil {
   630  		reschedulePolicy = tg.ReschedulePolicy
   631  	}
   632  	// No reschedule policy or rescheduling is disabled
   633  	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
   634  		return true
   635  	}
   636  	// Restart tracking information has been carried forward
   637  	if a.NextAllocation != "" {
   638  		return true
   639  	}
   640  
   641  	// This task has unlimited rescheduling and the alloc has not been replaced, so we can't GC it yet
   642  	if reschedulePolicy.Unlimited {
   643  		return false
   644  	}
   645  
   646  	// No restarts have been attempted yet
   647  	if a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0 {
   648  		return false
   649  	}
   650  
   651  	// Don't GC if most recent reschedule attempt is within time interval
   652  	interval := reschedulePolicy.Interval
   653  	lastIndex := len(a.RescheduleTracker.Events)
   654  	lastRescheduleEvent := a.RescheduleTracker.Events[lastIndex-1]
   655  	timeDiff := gcTime.UTC().UnixNano() - lastRescheduleEvent.RescheduleTime
   656  
   657  	return timeDiff > interval.Nanoseconds()
   658  }