github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/nomad/core_sched.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/hashicorp/nomad/nomad/state" 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/nomad/scheduler" 10 ) 11 12 // CoreScheduler is a special "scheduler" that is registered 13 // as "_core". It is used to run various administrative work 14 // across the cluster. 15 type CoreScheduler struct { 16 srv *Server 17 snap *state.StateSnapshot 18 } 19 20 // NewCoreScheduler is used to return a new system scheduler instance 21 func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler { 22 s := &CoreScheduler{ 23 srv: srv, 24 snap: snap, 25 } 26 return s 27 } 28 29 // Process is used to implement the scheduler.Scheduler interface 30 func (s *CoreScheduler) Process(eval *structs.Evaluation) error { 31 switch eval.JobID { 32 case structs.CoreJobEvalGC: 33 return s.evalGC(eval) 34 case structs.CoreJobNodeGC: 35 return s.nodeGC(eval) 36 case structs.CoreJobJobGC: 37 return s.jobGC(eval) 38 default: 39 return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID) 40 } 41 } 42 43 // jobGC is used to garbage collect eligible jobs. 44 func (c *CoreScheduler) jobGC(eval *structs.Evaluation) error { 45 // Get all the jobs eligible for garbage collection. 46 iter, err := c.snap.JobsByGC(true) 47 if err != nil { 48 return err 49 } 50 51 // Get the time table to calculate GC cutoffs. 52 tt := c.srv.fsm.TimeTable() 53 cutoff := time.Now().UTC().Add(-1 * c.srv.config.JobGCThreshold) 54 oldThreshold := tt.NearestIndex(cutoff) 55 c.srv.logger.Printf("[DEBUG] sched.core: job GC: scanning before index %d (%v)", 56 oldThreshold, c.srv.config.JobGCThreshold) 57 58 // Collect the allocations, evaluations and jobs to GC 59 var gcAlloc, gcEval, gcJob []string 60 61 OUTER: 62 for i := iter.Next(); i != nil; i = iter.Next() { 63 job := i.(*structs.Job) 64 65 // Ignore new jobs. 66 if job.CreateIndex > oldThreshold { 67 continue 68 } 69 70 evals, err := c.snap.EvalsByJob(job.ID) 71 if err != nil { 72 c.srv.logger.Printf("[ERR] sched.core: failed to get evals for job %s: %v", job.ID, err) 73 continue 74 } 75 76 for _, eval := range evals { 77 gc, allocs, err := c.gcEval(eval, oldThreshold) 78 if err != nil || !gc { 79 continue OUTER 80 } 81 82 gcEval = append(gcEval, eval.ID) 83 gcAlloc = append(gcAlloc, allocs...) 84 } 85 86 // Job is eligible for garbage collection 87 gcJob = append(gcJob, job.ID) 88 } 89 90 // Fast-path the nothing case 91 if len(gcEval) == 0 && len(gcAlloc) == 0 && len(gcJob) == 0 { 92 return nil 93 } 94 c.srv.logger.Printf("[DEBUG] sched.core: job GC: %d jobs, %d evaluations, %d allocs eligible", 95 len(gcJob), len(gcEval), len(gcAlloc)) 96 97 // Reap the evals and allocs 98 if err := c.evalReap(gcEval, gcAlloc); err != nil { 99 return err 100 } 101 102 // Call to the leader to deregister the jobs. 103 for _, job := range gcJob { 104 req := structs.JobDeregisterRequest{ 105 JobID: job, 106 WriteRequest: structs.WriteRequest{ 107 Region: c.srv.config.Region, 108 }, 109 } 110 var resp structs.JobDeregisterResponse 111 if err := c.srv.RPC("Job.Deregister", &req, &resp); err != nil { 112 c.srv.logger.Printf("[ERR] sched.core: job deregister failed: %v", err) 113 return err 114 } 115 } 116 117 return nil 118 } 119 120 // evalGC is used to garbage collect old evaluations 121 func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error { 122 // Iterate over the evaluations 123 iter, err := c.snap.Evals() 124 if err != nil { 125 return err 126 } 127 128 // Compute the old threshold limit for GC using the FSM 129 // time table. This is a rough mapping of a time to the 130 // Raft index it belongs to. 131 tt := c.srv.fsm.TimeTable() 132 cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold) 133 oldThreshold := tt.NearestIndex(cutoff) 134 c.srv.logger.Printf("[DEBUG] sched.core: eval GC: scanning before index %d (%v)", 135 oldThreshold, c.srv.config.EvalGCThreshold) 136 137 // Collect the allocations and evaluations to GC 138 var gcAlloc, gcEval []string 139 for raw := iter.Next(); raw != nil; raw = iter.Next() { 140 eval := raw.(*structs.Evaluation) 141 gc, allocs, err := c.gcEval(eval, oldThreshold) 142 if err != nil { 143 return err 144 } 145 146 if gc { 147 gcEval = append(gcEval, eval.ID) 148 gcAlloc = append(gcAlloc, allocs...) 149 } 150 } 151 152 // Fast-path the nothing case 153 if len(gcEval) == 0 && len(gcAlloc) == 0 { 154 return nil 155 } 156 c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible", 157 len(gcEval), len(gcAlloc)) 158 159 return c.evalReap(gcEval, gcAlloc) 160 } 161 162 // gcEval returns whether the eval should be garbage collected given a raft 163 // threshold index. The eval disqualifies for garbage collection if it or its 164 // allocs are not older than the threshold. If the eval should be garbage 165 // collected, the associated alloc ids that should also be removed are also 166 // returned 167 func (c *CoreScheduler) gcEval(eval *structs.Evaluation, thresholdIndex uint64) ( 168 bool, []string, error) { 169 // Ignore non-terminal and new evaluations 170 if !eval.TerminalStatus() || eval.ModifyIndex > thresholdIndex { 171 return false, nil, nil 172 } 173 174 // Get the allocations by eval 175 allocs, err := c.snap.AllocsByEval(eval.ID) 176 if err != nil { 177 c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", 178 eval.ID, err) 179 return false, nil, err 180 } 181 182 // Scan the allocations to ensure they are terminal and old 183 for _, alloc := range allocs { 184 if !alloc.TerminalStatus() || alloc.ModifyIndex > thresholdIndex { 185 return false, nil, nil 186 } 187 } 188 189 allocIds := make([]string, len(allocs)) 190 for i, alloc := range allocs { 191 allocIds[i] = alloc.ID 192 } 193 194 // Evaluation is eligible for garbage collection 195 return true, allocIds, nil 196 } 197 198 // evalReap contacts the leader and issues a reap on the passed evals and 199 // allocs. 200 func (c *CoreScheduler) evalReap(evals, allocs []string) error { 201 // Call to the leader to issue the reap 202 req := structs.EvalDeleteRequest{ 203 Evals: evals, 204 Allocs: allocs, 205 WriteRequest: structs.WriteRequest{ 206 Region: c.srv.config.Region, 207 }, 208 } 209 var resp structs.GenericResponse 210 if err := c.srv.RPC("Eval.Reap", &req, &resp); err != nil { 211 c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err) 212 return err 213 } 214 215 return nil 216 } 217 218 // nodeGC is used to garbage collect old nodes 219 func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error { 220 // Iterate over the evaluations 221 iter, err := c.snap.Nodes() 222 if err != nil { 223 return err 224 } 225 226 // Compute the old threshold limit for GC using the FSM 227 // time table. This is a rough mapping of a time to the 228 // Raft index it belongs to. 229 tt := c.srv.fsm.TimeTable() 230 cutoff := time.Now().UTC().Add(-1 * c.srv.config.NodeGCThreshold) 231 oldThreshold := tt.NearestIndex(cutoff) 232 c.srv.logger.Printf("[DEBUG] sched.core: node GC: scanning before index %d (%v)", 233 oldThreshold, c.srv.config.NodeGCThreshold) 234 235 // Collect the nodes to GC 236 var gcNode []string 237 for { 238 raw := iter.Next() 239 if raw == nil { 240 break 241 } 242 node := raw.(*structs.Node) 243 244 // Ignore non-terminal and new nodes 245 if !node.TerminalStatus() || node.ModifyIndex > oldThreshold { 246 continue 247 } 248 249 // Get the allocations by node 250 allocs, err := c.snap.AllocsByNode(node.ID) 251 if err != nil { 252 c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for node %s: %v", 253 eval.ID, err) 254 continue 255 } 256 257 // If there are any allocations, skip the node 258 if len(allocs) > 0 { 259 continue 260 } 261 262 // Node is eligible for garbage collection 263 gcNode = append(gcNode, node.ID) 264 } 265 266 // Fast-path the nothing case 267 if len(gcNode) == 0 { 268 return nil 269 } 270 c.srv.logger.Printf("[DEBUG] sched.core: node GC: %d nodes eligible", len(gcNode)) 271 272 // Call to the leader to issue the reap 273 for _, nodeID := range gcNode { 274 req := structs.NodeDeregisterRequest{ 275 NodeID: nodeID, 276 WriteRequest: structs.WriteRequest{ 277 Region: c.srv.config.Region, 278 }, 279 } 280 var resp structs.NodeUpdateResponse 281 if err := c.srv.RPC("Node.Deregister", &req, &resp); err != nil { 282 c.srv.logger.Printf("[ERR] sched.core: node '%s' reap failed: %v", nodeID, err) 283 return err 284 } 285 } 286 return nil 287 }