github.com/manicqin/nomad@v0.9.5/nomad/core_sched.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  	memdb "github.com/hashicorp/go-memdb"
    10  	version "github.com/hashicorp/go-version"
    11  	"github.com/hashicorp/nomad/nomad/state"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/hashicorp/nomad/scheduler"
    14  )
    15  
    16  var (
    17  	// maxIdsPerReap is the maximum number of evals and allocations to reap in a
    18  	// single Raft transaction. This is to ensure that the Raft message does not
    19  	// become too large.
    20  	maxIdsPerReap = (1024 * 256) / 36 // 0.25 MB of ids.
    21  )
    22  
    23  // CoreScheduler is a special "scheduler" that is registered
    24  // as "_core". It is used to run various administrative work
    25  // across the cluster.
    26  type CoreScheduler struct {
    27  	srv    *Server
    28  	snap   *state.StateSnapshot
    29  	logger log.Logger
    30  }
    31  
    32  // NewCoreScheduler is used to return a new system scheduler instance
    33  func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler {
    34  	s := &CoreScheduler{
    35  		srv:    srv,
    36  		snap:   snap,
    37  		logger: srv.logger.ResetNamed("core.sched"),
    38  	}
    39  	return s
    40  }
    41  
    42  // Process is used to implement the scheduler.Scheduler interface
    43  func (c *CoreScheduler) Process(eval *structs.Evaluation) error {
    44  	switch eval.JobID {
    45  	case structs.CoreJobEvalGC:
    46  		return c.evalGC(eval)
    47  	case structs.CoreJobNodeGC:
    48  		return c.nodeGC(eval)
    49  	case structs.CoreJobJobGC:
    50  		return c.jobGC(eval)
    51  	case structs.CoreJobDeploymentGC:
    52  		return c.deploymentGC(eval)
    53  	case structs.CoreJobForceGC:
    54  		return c.forceGC(eval)
    55  	default:
    56  		return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID)
    57  	}
    58  }
    59  
    60  // forceGC is used to garbage collect all eligible objects.
    61  func (c *CoreScheduler) forceGC(eval *structs.Evaluation) error {
    62  	if err := c.jobGC(eval); err != nil {
    63  		return err
    64  	}
    65  	if err := c.evalGC(eval); err != nil {
    66  		return err
    67  	}
    68  	if err := c.deploymentGC(eval); err != nil {
    69  		return err
    70  	}
    71  
    72  	// Node GC must occur after the others to ensure the allocations are
    73  	// cleared.
    74  	return c.nodeGC(eval)
    75  }
    76  
    77  // jobGC is used to garbage collect eligible jobs.
    78  func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error {
    79  	// Get all the jobs eligible for garbage collection.
    80  	ws := memdb.NewWatchSet()
    81  	iter, err := c.snap.JobsByGC(ws, true)
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	var oldThreshold uint64
    87  	if eval.JobID == structs.CoreJobForceGC {
    88  		// The GC was forced, so set the threshold to its maximum so everything
    89  		// will GC.
    90  		oldThreshold = math.MaxUint64
    91  		c.logger.Debug("forced job GC")
    92  	} else {
    93  		// Get the time table to calculate GC cutoffs.
    94  		tt := c.srv.fsm.TimeTable()
    95  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.JobGCThreshold)
    96  		oldThreshold = tt.NearestIndex(cutoff)
    97  		c.logger.Debug("job GC scanning before cutoff index",
    98  			"index", oldThreshold, "job_gc_threshold", c.srv.config.JobGCThreshold)
    99  	}
   100  
   101  	// Collect the allocations, evaluations and jobs to GC
   102  	var gcAlloc, gcEval []string
   103  	var gcJob []*structs.Job
   104  
   105  OUTER:
   106  	for i := iter.Next(); i != nil; i = iter.Next() {
   107  		job := i.(*structs.Job)
   108  
   109  		// Ignore new jobs.
   110  		if job.CreateIndex > oldThreshold {
   111  			continue
   112  		}
   113  
   114  		ws := memdb.NewWatchSet()
   115  		evals, err := c.snap.EvalsByJob(ws, job.Namespace, job.ID)
   116  		if err != nil {
   117  			c.logger.Error("job GC failed to get evals for job", "job", job.ID, "error", err)
   118  			continue
   119  		}
   120  
   121  		allEvalsGC := true
   122  		var jobAlloc, jobEval []string
   123  		for _, eval := range evals {
   124  			gc, allocs, err := c.gcEval(eval, oldThreshold, true)
   125  			if err != nil {
   126  				continue OUTER
   127  			}
   128  
   129  			if gc {
   130  				jobEval = append(jobEval, eval.ID)
   131  				jobAlloc = append(jobAlloc, allocs...)
   132  			} else {
   133  				allEvalsGC = false
   134  				break
   135  			}
   136  		}
   137  
   138  		// Job is eligible for garbage collection
   139  		if allEvalsGC {
   140  			gcJob = append(gcJob, job)
   141  			gcAlloc = append(gcAlloc, jobAlloc...)
   142  			gcEval = append(gcEval, jobEval...)
   143  		}
   144  	}
   145  
   146  	// Fast-path the nothing case
   147  	if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 {
   148  		return nil
   149  	}
   150  	c.logger.Debug("job GC found eligible objects",
   151  		"jobs", len(gcJob), "evals", len(gcEval), "allocs", len(gcAlloc))
   152  
   153  	// Reap the evals and allocs
   154  	if err := c.evalReap(gcEval, gcAlloc); err != nil {
   155  		return err
   156  	}
   157  
   158  	// Reap the jobs
   159  	return c.jobReap(gcJob, eval.LeaderACL)
   160  }
   161  
   162  // jobReap contacts the leader and issues a reap on the passed jobs
   163  func (c *CoreScheduler) jobReap(jobs []*structs.Job, leaderACL string) error {
   164  	// Call to the leader to issue the reap
   165  	for _, req := range c.partitionJobReap(jobs, leaderACL) {
   166  		var resp structs.JobBatchDeregisterResponse
   167  		if err := c.srv.RPC("Job.BatchDeregister", req, &resp); err != nil {
   168  			c.logger.Error("batch job reap failed", "error", err)
   169  			return err
   170  		}
   171  	}
   172  
   173  	return nil
   174  }
   175  
   176  // partitionJobReap returns a list of JobBatchDeregisterRequests to make,
   177  // ensuring a single request does not contain too many jobs. This is necessary
   178  // to ensure that the Raft transaction does not become too large.
   179  func (c *CoreScheduler) partitionJobReap(jobs []*structs.Job, leaderACL string) []*structs.JobBatchDeregisterRequest {
   180  	option := &structs.JobDeregisterOptions{Purge: true}
   181  	var requests []*structs.JobBatchDeregisterRequest
   182  	submittedJobs := 0
   183  	for submittedJobs != len(jobs) {
   184  		req := &structs.JobBatchDeregisterRequest{
   185  			Jobs: make(map[structs.NamespacedID]*structs.JobDeregisterOptions),
   186  			WriteRequest: structs.WriteRequest{
   187  				Region:    c.srv.config.Region,
   188  				AuthToken: leaderACL,
   189  			},
   190  		}
   191  		requests = append(requests, req)
   192  		available := maxIdsPerReap
   193  
   194  		if remaining := len(jobs) - submittedJobs; remaining > 0 {
   195  			if remaining <= available {
   196  				for _, job := range jobs[submittedJobs:] {
   197  					jns := structs.NamespacedID{ID: job.ID, Namespace: job.Namespace}
   198  					req.Jobs[jns] = option
   199  				}
   200  				submittedJobs += remaining
   201  			} else {
   202  				for _, job := range jobs[submittedJobs : submittedJobs+available] {
   203  					jns := structs.NamespacedID{ID: job.ID, Namespace: job.Namespace}
   204  					req.Jobs[jns] = option
   205  				}
   206  				submittedJobs += available
   207  			}
   208  		}
   209  	}
   210  
   211  	return requests
   212  }
   213  
   214  // evalGC is used to garbage collect old evaluations
   215  func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error {
   216  	// Iterate over the evaluations
   217  	ws := memdb.NewWatchSet()
   218  	iter, err := c.snap.Evals(ws)
   219  	if err != nil {
   220  		return err
   221  	}
   222  
   223  	var oldThreshold uint64
   224  	if eval.JobID == structs.CoreJobForceGC {
   225  		// The GC was forced, so set the threshold to its maximum so everything
   226  		// will GC.
   227  		oldThreshold = math.MaxUint64
   228  		c.logger.Debug("forced eval GC")
   229  	} else {
   230  		// Compute the old threshold limit for GC using the FSM
   231  		// time table.  This is a rough mapping of a time to the
   232  		// Raft index it belongs to.
   233  		tt := c.srv.fsm.TimeTable()
   234  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold)
   235  		oldThreshold = tt.NearestIndex(cutoff)
   236  		c.logger.Debug("eval GC scanning before cutoff index",
   237  			"index", oldThreshold, "eval_gc_threshold", c.srv.config.EvalGCThreshold)
   238  	}
   239  
   240  	// Collect the allocations and evaluations to GC
   241  	var gcAlloc, gcEval []string
   242  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
   243  		eval := raw.(*structs.Evaluation)
   244  
   245  		// The Evaluation GC should not handle batch jobs since those need to be
   246  		// garbage collected in one shot
   247  		gc, allocs, err := c.gcEval(eval, oldThreshold, false)
   248  		if err != nil {
   249  			return err
   250  		}
   251  
   252  		if gc {
   253  			gcEval = append(gcEval, eval.ID)
   254  		}
   255  		gcAlloc = append(gcAlloc, allocs...)
   256  	}
   257  
   258  	// Fast-path the nothing case
   259  	if len(gcEval) == 0 && len(gcAlloc) == 0 {
   260  		return nil
   261  	}
   262  	c.logger.Debug("eval GC found eligibile objects",
   263  		"evals", len(gcEval), "allocs", len(gcAlloc))
   264  
   265  	return c.evalReap(gcEval, gcAlloc)
   266  }
   267  
   268  // gcEval returns whether the eval should be garbage collected given a raft
   269  // threshold index. The eval disqualifies for garbage collection if it or its
   270  // allocs are not older than the threshold. If the eval should be garbage
   271  // collected, the associated alloc ids that should also be removed are also
   272  // returned
   273  func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64, allowBatch bool) (
   274  	bool, []string, error) {
   275  	// Ignore non-terminal and new evaluations
   276  	if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex {
   277  		return false, nil, nil
   278  	}
   279  
   280  	// Create a watchset
   281  	ws := memdb.NewWatchSet()
   282  
   283  	// Look up the job
   284  	job, err := c.snap.JobByID(ws, eval.Namespace, eval.JobID)
   285  	if err != nil {
   286  		return false, nil, err
   287  	}
   288  
   289  	// Get the allocations by eval
   290  	allocs, err := c.snap.AllocsByEval(ws, eval.ID)
   291  	if err != nil {
   292  		c.logger.Error("failed to get allocs for eval",
   293  			"eval_id", eval.ID, "error", err)
   294  		return false, nil, err
   295  	}
   296  
   297  	// If the eval is from a running "batch" job we don't want to garbage
   298  	// collect its allocations. If there is a long running batch job and its
   299  	// terminal allocations get GC'd the scheduler would re-run the
   300  	// allocations.
   301  	if eval.Type == structs.JobTypeBatch {
   302  		// Check if the job is running
   303  
   304  		// Can collect if:
   305  		// Job doesn't exist
   306  		// Job is Stopped and dead
   307  		// allowBatch and the job is dead
   308  		collect := false
   309  		if job == nil {
   310  			collect = true
   311  		} else if job.Status != structs.JobStatusDead {
   312  			collect = false
   313  		} else if job.Stop {
   314  			collect = true
   315  		} else if allowBatch {
   316  			collect = true
   317  		}
   318  
   319  		// We don't want to gc anything related to a job which is not dead
   320  		// If the batch job doesn't exist we can GC it regardless of allowBatch
   321  		if !collect {
   322  			// Find allocs associated with older (based on createindex) and GC them if terminal
   323  			oldAllocs := olderVersionTerminalAllocs(allocs, job)
   324  			return false, oldAllocs, nil
   325  		}
   326  	}
   327  
   328  	// Scan the allocations to ensure they are terminal and old
   329  	gcEval := true
   330  	var gcAllocIDs []string
   331  	for _, alloc := range allocs {
   332  		if !allocGCEligible(alloc, job, time.Now(), thresholdIndex) {
   333  			// Can't GC the evaluation since not all of the allocations are
   334  			// terminal
   335  			gcEval = false
   336  		} else {
   337  			// The allocation is eligible to be GC'd
   338  			gcAllocIDs = append(gcAllocIDs, alloc.ID)
   339  		}
   340  	}
   341  
   342  	return gcEval, gcAllocIDs, nil
   343  }
   344  
   345  // olderVersionTerminalAllocs returns terminal allocations whose job create index
   346  // is older than the job's create index
   347  func olderVersionTerminalAllocs(allocs []*structs.Allocation, job *structs.Job) []string {
   348  	var ret []string
   349  	for _, alloc := range allocs {
   350  		if alloc.Job != nil && alloc.Job.CreateIndex < job.CreateIndex && alloc.TerminalStatus() {
   351  			ret = append(ret, alloc.ID)
   352  		}
   353  	}
   354  	return ret
   355  }
   356  
   357  // evalReap contacts the leader and issues a reap on the passed evals and
   358  // allocs.
   359  func (c *CoreScheduler) evalReap(evals, allocs []string) error {
   360  	// Call to the leader to issue the reap
   361  	for _, req := range c.partitionEvalReap(evals, allocs) {
   362  		var resp structs.GenericResponse
   363  		if err := c.srv.RPC("Eval.Reap", req, &resp); err != nil {
   364  			c.logger.Error("eval reap failed", "error", err)
   365  			return err
   366  		}
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  // partitionEvalReap returns a list of EvalDeleteRequest to make, ensuring a single
   373  // request does not contain too many allocations and evaluations. This is
   374  // necessary to ensure that the Raft transaction does not become too large.
   375  func (c *CoreScheduler) partitionEvalReap(evals, allocs []string) []*structs.EvalDeleteRequest {
   376  	var requests []*structs.EvalDeleteRequest
   377  	submittedEvals, submittedAllocs := 0, 0
   378  	for submittedEvals != len(evals) || submittedAllocs != len(allocs) {
   379  		req := &structs.EvalDeleteRequest{
   380  			WriteRequest: structs.WriteRequest{
   381  				Region: c.srv.config.Region,
   382  			},
   383  		}
   384  		requests = append(requests, req)
   385  		available := maxIdsPerReap
   386  
   387  		// Add the allocs first
   388  		if remaining := len(allocs) - submittedAllocs; remaining > 0 {
   389  			if remaining <= available {
   390  				req.Allocs = allocs[submittedAllocs:]
   391  				available -= remaining
   392  				submittedAllocs += remaining
   393  			} else {
   394  				req.Allocs = allocs[submittedAllocs : submittedAllocs+available]
   395  				submittedAllocs += available
   396  
   397  				// Exhausted space so skip adding evals
   398  				continue
   399  			}
   400  		}
   401  
   402  		// Add the evals
   403  		if remaining := len(evals) - submittedEvals; remaining > 0 {
   404  			if remaining <= available {
   405  				req.Evals = evals[submittedEvals:]
   406  				submittedEvals += remaining
   407  			} else {
   408  				req.Evals = evals[submittedEvals : submittedEvals+available]
   409  				submittedEvals += available
   410  			}
   411  		}
   412  	}
   413  
   414  	return requests
   415  }
   416  
   417  // nodeGC is used to garbage collect old nodes
   418  func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error {
   419  	// Iterate over the evaluations
   420  	ws := memdb.NewWatchSet()
   421  	iter, err := c.snap.Nodes(ws)
   422  	if err != nil {
   423  		return err
   424  	}
   425  
   426  	var oldThreshold uint64
   427  	if eval.JobID == structs.CoreJobForceGC {
   428  		// The GC was forced, so set the threshold to its maximum so everything
   429  		// will GC.
   430  		oldThreshold = math.MaxUint64
   431  		c.logger.Debug("forced node GC")
   432  	} else {
   433  		// Compute the old threshold limit for GC using the FSM
   434  		// time table.  This is a rough mapping of a time to the
   435  		// Raft index it belongs to.
   436  		tt := c.srv.fsm.TimeTable()
   437  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.NodeGCThreshold)
   438  		oldThreshold = tt.NearestIndex(cutoff)
   439  		c.logger.Debug("node GC scanning before cutoff index",
   440  			"index", oldThreshold, "node_gc_threshold", c.srv.config.NodeGCThreshold)
   441  	}
   442  
   443  	// Collect the nodes to GC
   444  	var gcNode []string
   445  OUTER:
   446  	for {
   447  		raw := iter.Next()
   448  		if raw == nil {
   449  			break
   450  		}
   451  		node := raw.(*structs.Node)
   452  
   453  		// Ignore non-terminal and new nodes
   454  		if !node.TerminalStatus() || node.ModifyIndex > oldThreshold {
   455  			continue
   456  		}
   457  
   458  		// Get the allocations by node
   459  		ws := memdb.NewWatchSet()
   460  		allocs, err := c.snap.AllocsByNode(ws, node.ID)
   461  		if err != nil {
   462  			c.logger.Error("failed to get allocs for node",
   463  				"node_id", node.ID, "error", err)
   464  			continue
   465  		}
   466  
   467  		// If there are any non-terminal allocations, skip the node. If the node
   468  		// is terminal and the allocations are not, the scheduler may not have
   469  		// run yet to transition the allocs on the node to terminal. We delay
   470  		// GC'ing until this happens.
   471  		for _, alloc := range allocs {
   472  			if !alloc.TerminalStatus() {
   473  				continue OUTER
   474  			}
   475  		}
   476  
   477  		// Node is eligible for garbage collection
   478  		gcNode = append(gcNode, node.ID)
   479  	}
   480  
   481  	// Fast-path the nothing case
   482  	if len(gcNode) == 0 {
   483  		return nil
   484  	}
   485  	c.logger.Debug("node GC found eligible nodes", "nodes", len(gcNode))
   486  	return c.nodeReap(eval, gcNode)
   487  }
   488  
   489  func (c *CoreScheduler) nodeReap(eval *structs.Evaluation, nodeIDs []string) error {
   490  	// For old clusters, send single deregistration messages COMPAT(0.11)
   491  	minVersionBatchNodeDeregister := version.Must(version.NewVersion("0.9.4"))
   492  	if !ServersMeetMinimumVersion(c.srv.Members(), minVersionBatchNodeDeregister, true) {
   493  		for _, id := range nodeIDs {
   494  			req := structs.NodeDeregisterRequest{
   495  				NodeID: id,
   496  				WriteRequest: structs.WriteRequest{
   497  					Region:    c.srv.config.Region,
   498  					AuthToken: eval.LeaderACL,
   499  				},
   500  			}
   501  			var resp structs.NodeUpdateResponse
   502  			if err := c.srv.RPC("Node.Deregister", &req, &resp); err != nil {
   503  				c.logger.Error("node reap failed", "node_id", id, "error", err)
   504  				return err
   505  			}
   506  		}
   507  		return nil
   508  	}
   509  
   510  	// Call to the leader to issue the reap
   511  	for _, ids := range partitionAll(maxIdsPerReap, nodeIDs) {
   512  		req := structs.NodeBatchDeregisterRequest{
   513  			NodeIDs: ids,
   514  			WriteRequest: structs.WriteRequest{
   515  				Region:    c.srv.config.Region,
   516  				AuthToken: eval.LeaderACL,
   517  			},
   518  		}
   519  		var resp structs.NodeUpdateResponse
   520  		if err := c.srv.RPC("Node.BatchDeregister", &req, &resp); err != nil {
   521  			c.logger.Error("node reap failed", "node_ids", ids, "error", err)
   522  			return err
   523  		}
   524  	}
   525  	return nil
   526  }
   527  
   528  // deploymentGC is used to garbage collect old deployments
   529  func (c *CoreScheduler) deploymentGC(eval *structs.Evaluation) error {
   530  	// Iterate over the deployments
   531  	ws := memdb.NewWatchSet()
   532  	iter, err := c.snap.Deployments(ws)
   533  	if err != nil {
   534  		return err
   535  	}
   536  
   537  	var oldThreshold uint64
   538  	if eval.JobID == structs.CoreJobForceGC {
   539  		// The GC was forced, so set the threshold to its maximum so everything
   540  		// will GC.
   541  		oldThreshold = math.MaxUint64
   542  		c.logger.Debug("forced deployment GC")
   543  	} else {
   544  		// Compute the old threshold limit for GC using the FSM
   545  		// time table.  This is a rough mapping of a time to the
   546  		// Raft index it belongs to.
   547  		tt := c.srv.fsm.TimeTable()
   548  		cutoff := time.Now().UTC().Add(-1 * c.srv.config.DeploymentGCThreshold)
   549  		oldThreshold = tt.NearestIndex(cutoff)
   550  		c.logger.Debug("deployment GC scanning before cutoff index",
   551  			"index", oldThreshold, "deployment_gc_threshold", c.srv.config.DeploymentGCThreshold)
   552  	}
   553  
   554  	// Collect the deployments to GC
   555  	var gcDeployment []string
   556  
   557  OUTER:
   558  	for {
   559  		raw := iter.Next()
   560  		if raw == nil {
   561  			break
   562  		}
   563  		deploy := raw.(*structs.Deployment)
   564  
   565  		// Ignore non-terminal and new deployments
   566  		if deploy.Active() || deploy.ModifyIndex > oldThreshold {
   567  			continue
   568  		}
   569  
   570  		// Ensure there are no allocs referencing this deployment.
   571  		allocs, err := c.snap.AllocsByDeployment(ws, deploy.ID)
   572  		if err != nil {
   573  			c.logger.Error("failed to get allocs for deployment",
   574  				"deployment_id", deploy.ID, "error", err)
   575  			continue
   576  		}
   577  
   578  		// Ensure there is no allocation referencing the deployment.
   579  		for _, alloc := range allocs {
   580  			if !alloc.TerminalStatus() {
   581  				continue OUTER
   582  			}
   583  		}
   584  
   585  		// Deployment is eligible for garbage collection
   586  		gcDeployment = append(gcDeployment, deploy.ID)
   587  	}
   588  
   589  	// Fast-path the nothing case
   590  	if len(gcDeployment) == 0 {
   591  		return nil
   592  	}
   593  	c.logger.Debug("deployment GC found eligible deployments", "deployments", len(gcDeployment))
   594  	return c.deploymentReap(gcDeployment)
   595  }
   596  
   597  // deploymentReap contacts the leader and issues a reap on the passed
   598  // deployments.
   599  func (c *CoreScheduler) deploymentReap(deployments []string) error {
   600  	// Call to the leader to issue the reap
   601  	for _, req := range c.partitionDeploymentReap(deployments) {
   602  		var resp structs.GenericResponse
   603  		if err := c.srv.RPC("Deployment.Reap", req, &resp); err != nil {
   604  			c.logger.Error("deployment reap failed", "error", err)
   605  			return err
   606  		}
   607  	}
   608  
   609  	return nil
   610  }
   611  
   612  // partitionDeploymentReap returns a list of DeploymentDeleteRequest to make,
   613  // ensuring a single request does not contain too many deployments. This is
   614  // necessary to ensure that the Raft transaction does not become too large.
   615  func (c *CoreScheduler) partitionDeploymentReap(deployments []string) []*structs.DeploymentDeleteRequest {
   616  	var requests []*structs.DeploymentDeleteRequest
   617  	submittedDeployments := 0
   618  	for submittedDeployments != len(deployments) {
   619  		req := &structs.DeploymentDeleteRequest{
   620  			WriteRequest: structs.WriteRequest{
   621  				Region: c.srv.config.Region,
   622  			},
   623  		}
   624  		requests = append(requests, req)
   625  		available := maxIdsPerReap
   626  
   627  		if remaining := len(deployments) - submittedDeployments; remaining > 0 {
   628  			if remaining <= available {
   629  				req.Deployments = deployments[submittedDeployments:]
   630  				submittedDeployments += remaining
   631  			} else {
   632  				req.Deployments = deployments[submittedDeployments : submittedDeployments+available]
   633  				submittedDeployments += available
   634  			}
   635  		}
   636  	}
   637  
   638  	return requests
   639  }
   640  
   641  // allocGCEligible returns if the allocation is eligible to be garbage collected
   642  // according to its terminal status and its reschedule trackers
   643  func allocGCEligible(a *structs.Allocation, job *structs.Job, gcTime time.Time, thresholdIndex uint64) bool {
   644  	// Not in a terminal status and old enough
   645  	if !a.TerminalStatus() || a.ModifyIndex > thresholdIndex {
   646  		return false
   647  	}
   648  
   649  	// If the allocation is still running on the client we can not garbage
   650  	// collect it.
   651  	if a.ClientStatus == structs.AllocClientStatusRunning {
   652  		return false
   653  	}
   654  
   655  	// If the job is deleted, stopped or dead all allocs can be removed
   656  	if job == nil || job.Stop || job.Status == structs.JobStatusDead {
   657  		return true
   658  	}
   659  
   660  	// If the allocation's desired state is Stop, it can be GCed even if it
   661  	// has failed and hasn't been rescheduled. This can happen during job updates
   662  	if a.DesiredStatus == structs.AllocDesiredStatusStop {
   663  		return true
   664  	}
   665  
   666  	// If the alloc hasn't failed then we don't need to consider it for rescheduling
   667  	// Rescheduling needs to copy over information from the previous alloc so that it
   668  	// can enforce the reschedule policy
   669  	if a.ClientStatus != structs.AllocClientStatusFailed {
   670  		return true
   671  	}
   672  
   673  	var reschedulePolicy *structs.ReschedulePolicy
   674  	tg := job.LookupTaskGroup(a.TaskGroup)
   675  
   676  	if tg != nil {
   677  		reschedulePolicy = tg.ReschedulePolicy
   678  	}
   679  	// No reschedule policy or rescheduling is disabled
   680  	if reschedulePolicy == nil || (!reschedulePolicy.Unlimited && reschedulePolicy.Attempts == 0) {
   681  		return true
   682  	}
   683  	// Restart tracking information has been carried forward
   684  	if a.NextAllocation != "" {
   685  		return true
   686  	}
   687  
   688  	// This task has unlimited rescheduling and the alloc has not been replaced, so we can't GC it yet
   689  	if reschedulePolicy.Unlimited {
   690  		return false
   691  	}
   692  
   693  	// No restarts have been attempted yet
   694  	if a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0 {
   695  		return false
   696  	}
   697  
   698  	// Don't GC if most recent reschedule attempt is within time interval
   699  	interval := reschedulePolicy.Interval
   700  	lastIndex := len(a.RescheduleTracker.Events)
   701  	lastRescheduleEvent := a.RescheduleTracker.Events[lastIndex-1]
   702  	timeDiff := gcTime.UTC().UnixNano() - lastRescheduleEvent.RescheduleTime
   703  
   704  	return timeDiff > interval.Nanoseconds()
   705  }