github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/core_sched.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/hashicorp/nomad/nomad/state" 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/nomad/scheduler" 10 ) 11 12 // CoreScheduler is a special "scheduler" that is registered 13 // as "_core". It is used to run various administrative work 14 // across the cluster. 15 type CoreScheduler struct { 16 srv *Server 17 snap *state.StateSnapshot 18 } 19 20 // NewCoreScheduler is used to return a new system scheduler instance 21 func NewCoreScheduler(srv *Server, snap *state.StateSnapshot) scheduler.Scheduler { 22 s := &CoreScheduler{ 23 srv: srv, 24 snap: snap, 25 } 26 return s 27 } 28 29 // Process is used to implement the scheduler.Scheduler interface 30 func (s *CoreScheduler) Process(eval *structs.Evaluation) error { 31 switch eval.JobID { 32 case structs.CoreJobEvalGC: 33 return s.evalGC(eval) 34 case structs.CoreJobNodeGC: 35 return s.nodeGC(eval) 36 default: 37 return fmt.Errorf("core scheduler cannot handle job '%s'", eval.JobID) 38 } 39 } 40 41 // evalGC is used to garbage collect old evaluations 42 func (c *CoreScheduler) evalGC(eval *structs.Evaluation) error { 43 // Iterate over the evaluations 44 iter, err := c.snap.Evals() 45 if err != nil { 46 return err 47 } 48 49 // Compute the old threshold limit for GC using the FSM 50 // time table. This is a rough mapping of a time to the 51 // Raft index it belongs to. 52 tt := c.srv.fsm.TimeTable() 53 cutoff := time.Now().UTC().Add(-1 * c.srv.config.EvalGCThreshold) 54 oldThreshold := tt.NearestIndex(cutoff) 55 c.srv.logger.Printf("[DEBUG] sched.core: eval GC: scanning before index %d (%v)", 56 oldThreshold, c.srv.config.EvalGCThreshold) 57 58 // Collect the allocations and evaluations to GC 59 var gcAlloc, gcEval []string 60 61 OUTER: 62 for { 63 raw := iter.Next() 64 if raw == nil { 65 break 66 } 67 eval := raw.(*structs.Evaluation) 68 69 // Ignore non-terminal and new evaluations 70 if !eval.TerminalStatus() || eval.ModifyIndex > oldThreshold { 71 continue 72 } 73 74 // Get the allocations by eval 75 allocs, err := c.snap.AllocsByEval(eval.ID) 76 if err != nil { 77 c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for eval %s: %v", 78 eval.ID, err) 79 continue 80 } 81 82 // Scan the allocations to ensure they are terminal and old 83 for _, alloc := range allocs { 84 if !alloc.TerminalStatus() || alloc.ModifyIndex > oldThreshold { 85 continue OUTER 86 } 87 } 88 89 // Evaluation is eligible for garbage collection 90 gcEval = append(gcEval, eval.ID) 91 for _, alloc := range allocs { 92 gcAlloc = append(gcAlloc, alloc.ID) 93 } 94 } 95 96 // Fast-path the nothing case 97 if len(gcEval) == 0 && len(gcAlloc) == 0 { 98 return nil 99 } 100 c.srv.logger.Printf("[DEBUG] sched.core: eval GC: %d evaluations, %d allocs eligible", 101 len(gcEval), len(gcAlloc)) 102 103 // Call to the leader to issue the reap 104 req := structs.EvalDeleteRequest{ 105 Evals: gcEval, 106 Allocs: gcAlloc, 107 WriteRequest: structs.WriteRequest{ 108 Region: c.srv.config.Region, 109 }, 110 } 111 var resp structs.GenericResponse 112 if err := c.srv.RPC("Eval.Reap", &req, &resp); err != nil { 113 c.srv.logger.Printf("[ERR] sched.core: eval reap failed: %v", err) 114 return err 115 } 116 return nil 117 } 118 119 // nodeGC is used to garbage collect old nodes 120 func (c *CoreScheduler) nodeGC(eval *structs.Evaluation) error { 121 // Iterate over the evaluations 122 iter, err := c.snap.Nodes() 123 if err != nil { 124 return err 125 } 126 127 // Compute the old threshold limit for GC using the FSM 128 // time table. This is a rough mapping of a time to the 129 // Raft index it belongs to. 130 tt := c.srv.fsm.TimeTable() 131 cutoff := time.Now().UTC().Add(-1 * c.srv.config.NodeGCThreshold) 132 oldThreshold := tt.NearestIndex(cutoff) 133 c.srv.logger.Printf("[DEBUG] sched.core: node GC: scanning before index %d (%v)", 134 oldThreshold, c.srv.config.NodeGCThreshold) 135 136 // Collect the nodes to GC 137 var gcNode []string 138 for { 139 raw := iter.Next() 140 if raw == nil { 141 break 142 } 143 node := raw.(*structs.Node) 144 145 // Ignore non-terminal and new nodes 146 if !node.TerminalStatus() || node.ModifyIndex > oldThreshold { 147 continue 148 } 149 150 // Get the allocations by node 151 allocs, err := c.snap.AllocsByNode(node.ID) 152 if err != nil { 153 c.srv.logger.Printf("[ERR] sched.core: failed to get allocs for node %s: %v", 154 eval.ID, err) 155 continue 156 } 157 158 // If there are any allocations, skip the node 159 if len(allocs) > 0 { 160 continue 161 } 162 163 // Node is eligible for garbage collection 164 gcNode = append(gcNode, node.ID) 165 } 166 167 // Fast-path the nothing case 168 if len(gcNode) == 0 { 169 return nil 170 } 171 c.srv.logger.Printf("[DEBUG] sched.core: node GC: %d nodes eligible", len(gcNode)) 172 173 // Call to the leader to issue the reap 174 for _, nodeID := range gcNode { 175 req := structs.NodeDeregisterRequest{ 176 NodeID: nodeID, 177 WriteRequest: structs.WriteRequest{ 178 Region: c.srv.config.Region, 179 }, 180 } 181 var resp structs.NodeUpdateResponse 182 if err := c.srv.RPC("Node.Deregister", &req, &resp); err != nil { 183 c.srv.logger.Printf("[ERR] sched.core: node '%s' reap failed: %v", nodeID, err) 184 return err 185 } 186 } 187 return nil 188 }