github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/nomad/core_sched.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/nomad/state"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	"github.com/hashicorp/nomad/scheduler"
    10  )
    11  
    12  // CoreScheduler is a special "scheduler" that is registered
    13  // as "_core". It is used to run various administrative work
    14  // across the cluster.
    15  type CoreScheduler struct {
    16  	srv  *Server
    17  	snap *state.StateSnapshot
    18  }
    19  
    20  // NewCoreScheduler is used to return a new system scheduler instance
    21  func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler {
    22  	s := &CoreScheduler{
    23  		srv:  srv,
    24  		snap: snap,
    25  	}
    26  	return s
    27  }
    28  
    29  // Process is used to implement the scheduler.Scheduler interface
    30  func (s *CoreScheduler) Process(eval *structs.Evaluation) error {
    31  	switch eval.JobID {
    32  	case structs.CoreJobEvalGC:
    33  		return s.evalGC(eval)
    34  	case structs.CoreJobNodeGC:
    35  		return s.nodeGC(eval)
    36  	case structs.CoreJobJobGC:
    37  		return s.jobGC(eval)
    38  	default:
    39  		return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID)
    40  	}
    41  }
    42  
    43  // jobGC is used to garbage collect eligible jobs.
    44  func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error {
    45  	// Get all the jobs eligible for garbage collection.
    46  	iter, err := c.snap.JobsByGC(true)
    47  	if err != nil {
    48  		return err
    49  	}
    50  
    51  	// Get the time table to calculate GC cutoffs.
    52  	tt := c.srv.fsm.TimeTable()
    53  	cutoff := time.Now().UTC().Add(-1 * c.srv.config.JobGCThreshold)
    54  	oldThreshold := tt.NearestIndex(cutoff)
    55  	c.srv.logger.Printf("[DEBUG] sched.core: job GC: scanning before index %d (%v)",
    56  		oldThreshold, c.srv.config.JobGCThreshold)
    57  
    58  	// Collect the allocations, evaluations and jobs to GC
    59  	var gcAlloc, gcEval, gcJob []string
    60  
    61  OUTER:
    62  	for i := iter.Next(); i != nil; i = iter.Next() {
    63  		job := i.(*structs.Job)
    64  
    65  		// Ignore new jobs.
    66  		if job.CreateIndex > oldThreshold {
    67  			continue
    68  		}
    69  
    70  		evals, err := c.snap.EvalsByJob(job.ID)
    71  		if err != nil {
    72  			c.srv.logger.Printf("[ERR] sched.core: failed to get evals for job %s: %v", job.ID, err)
    73  			continue
    74  		}
    75  
    76  		for _, eval := range evals {
    77  			gc, allocs, err := c.gcEval(eval, oldThreshold)
    78  			if err != nil || !gc {
    79  				continue OUTER
    80  			}
    81  
    82  			gcEval = append(gcEval, eval.ID)
    83  			gcAlloc = append(gcAlloc, allocs...)
    84  		}
    85  
    86  		// Job is eligible for garbage collection
    87  		gcJob = append(gcJob, job.ID)
    88  	}
    89  
    90  	// Fast-path the nothing case
    91  	if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 {
    92  		return nil
    93  	}
    94  	c.srv.logger.Printf("[DEBUG] sched.core: job GC: %d jobs, %d evaluations, %d allocs eligible",
    95  		len(gcJob), len(gcEval), len(gcAlloc))
    96  
    97  	// Reap the evals and allocs
    98  	if err := c.evalReap(gcEval, gcAlloc); err != nil {
    99  		return err
   100  	}
   101  
   102  	// Call to the leader to deregister the jobs.
   103  	for _, job := range gcJob {
   104  		req := structs.JobDeregisterRequest{
   105  			JobID: job,
   106  			WriteRequest: structs.WriteRequest{
   107  				Region: c.srv.config.Region,
   108  			},
   109  		}
   110  		var resp structs.JobDeregisterResponse
   111  		if err := c.srv.RPC("Job.Deregister", &req, &resp); err != nil {
   112  			c.srv.logger.Printf("[ERR] sched.core: job deregister failed: %v", err)
   113  			return err
   114  		}
   115  	}
   116  
   117  	return nil
   118  }
   119  
   120  // evalGC is used to garbage collect old evaluations
   121  func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error {
   122  	// Iterate over the evaluations
   123  	iter, err := c.snap.Evals()
   124  	if err != nil {
   125  		return err
   126  	}
   127  
   128  	// Compute the old threshold limit for GC using the FSM
   129  	// time table.  This is a rough mapping of a time to the
   130  	// Raft index it belongs to.
   131  	tt := c.srv.fsm.TimeTable()
   132  	cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold)
   133  	oldThreshold := tt.NearestIndex(cutoff)
   134  	c.srv.logger.Printf("[DEBUG] sched.core: eval GC: scanning before index %d (%v)",
   135  		oldThreshold, c.srv.config.EvalGCThreshold)
   136  
   137  	// Collect the allocations and evaluations to GC
   138  	var gcAlloc, gcEval []string
   139  	for raw := iter.Next(); raw != nil; raw = iter.Next() {
   140  		eval := raw.(*structs.Evaluation)
   141  		gc, allocs, err := c.gcEval(eval, oldThreshold)
   142  		if err != nil {
   143  			return err
   144  		}
   145  
   146  		if gc {
   147  			gcEval = append(gcEval, eval.ID)
   148  			gcAlloc = append(gcAlloc, allocs...)
   149  		}
   150  	}
   151  
   152  	// Fast-path the nothing case
   153  	if len(gcEval) == 0 && len(gcAlloc) == 0 {
   154  		return nil
   155  	}
   156  	c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible",
   157  		len(gcEval), len(gcAlloc))
   158  
   159  	return c.evalReap(gcEval, gcAlloc)
   160  }
   161  
   162  // gcEval returns whether the eval should be garbage collected given a raft
   163  // threshold index. The eval disqualifies for garbage collection if it or its
   164  // allocs are not older than the threshold. If the eval should be garbage
   165  // collected, the associated alloc ids that should also be removed are also
   166  // returned
   167  func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64) (
   168  	bool, []string, error) {
   169  	// Ignore non-terminal and new evaluations
   170  	if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex {
   171  		return false, nil, nil
   172  	}
   173  
   174  	// Get the allocations by eval
   175  	allocs, err := c.snap.AllocsByEval(eval.ID)
   176  	if err != nil {
   177  		c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v",
   178  			eval.ID, err)
   179  		return false, nil, err
   180  	}
   181  
   182  	// Scan the allocations to ensure they are terminal and old
   183  	for _, alloc := range allocs {
   184  		if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex {
   185  			return false, nil, nil
   186  		}
   187  	}
   188  
   189  	allocIds := make([]string, len(allocs))
   190  	for i, alloc := range allocs {
   191  		allocIds[i] = alloc.ID
   192  	}
   193  
   194  	// Evaluation is eligible for garbage collection
   195  	return true, allocIds, nil
   196  }
   197  
   198  // evalReap contacts the leader and issues a reap on the passed evals and
   199  // allocs.
   200  func (c *CoreScheduler) evalReap(evals, allocs []string) error {
   201  	// Call to the leader to issue the reap
   202  	req := structs.EvalDeleteRequest{
   203  		Evals:  evals,
   204  		Allocs: allocs,
   205  		WriteRequest: structs.WriteRequest{
   206  			Region: c.srv.config.Region,
   207  		},
   208  	}
   209  	var resp structs.GenericResponse
   210  	if err := c.srv.RPC("Eval.Reap", &req, &resp); err != nil {
   211  		c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err)
   212  		return err
   213  	}
   214  
   215  	return nil
   216  }
   217  
   218  // nodeGC is used to garbage collect old nodes
   219  func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error {
   220  	// Iterate over the evaluations
   221  	iter, err := c.snap.Nodes()
   222  	if err != nil {
   223  		return err
   224  	}
   225  
   226  	// Compute the old threshold limit for GC using the FSM
   227  	// time table.  This is a rough mapping of a time to the
   228  	// Raft index it belongs to.
   229  	tt := c.srv.fsm.TimeTable()
   230  	cutoff := time.Now().UTC().Add(-1 * c.srv.config.NodeGCThreshold)
   231  	oldThreshold := tt.NearestIndex(cutoff)
   232  	c.srv.logger.Printf("[DEBUG] sched.core: node GC: scanning before index %d (%v)",
   233  		oldThreshold, c.srv.config.NodeGCThreshold)
   234  
   235  	// Collect the nodes to GC
   236  	var gcNode []string
   237  	for {
   238  		raw := iter.Next()
   239  		if raw == nil {
   240  			break
   241  		}
   242  		node := raw.(*structs.Node)
   243  
   244  		// Ignore non-terminal and new nodes
   245  		if !node.TerminalStatus() || node.ModifyIndex > oldThreshold {
   246  			continue
   247  		}
   248  
   249  		// Get the allocations by node
   250  		allocs, err := c.snap.AllocsByNode(node.ID)
   251  		if err != nil {
   252  			c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for node %s: %v",
   253  				eval.ID, err)
   254  			continue
   255  		}
   256  
   257  		// If there are any allocations, skip the node
   258  		if len(allocs) > 0 {
   259  			continue
   260  		}
   261  
   262  		// Node is eligible for garbage collection
   263  		gcNode = append(gcNode, node.ID)
   264  	}
   265  
   266  	// Fast-path the nothing case
   267  	if len(gcNode) == 0 {
   268  		return nil
   269  	}
   270  	c.srv.logger.Printf("[DEBUG] sched.core: node GC: %d nodes eligible", len(gcNode))
   271  
   272  	// Call to the leader to issue the reap
   273  	for _, nodeID := range gcNode {
   274  		req := structs.NodeDeregisterRequest{
   275  			NodeID: nodeID,
   276  			WriteRequest: structs.WriteRequest{
   277  				Region: c.srv.config.Region,
   278  			},
   279  		}
   280  		var resp structs.NodeUpdateResponse
   281  		if err := c.srv.RPC("Node.Deregister", &req, &resp); err != nil {
   282  			c.srv.logger.Printf("[ERR] sched.core: node '%s' reap failed: %v", nodeID, err)
   283  			return err
   284  		}
   285  	}
   286  	return nil
   287  }