github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/scheduler/stack.go (about) 1 package scheduler 2 3 import ( 4 "math" 5 "time" 6 7 "github.com/hashicorp/nomad/nomad/structs" 8 ) 9 10 const ( 11 // serviceJobAntiAffinityPenalty is the penalty applied 12 // to the score for placing an alloc on a node that 13 // already has an alloc for this job. 14 serviceJobAntiAffinityPenalty = 10.0 15 16 // batchJobAntiAffinityPenalty is the same as the 17 // serviceJobAntiAffinityPenalty but for batch type jobs. 18 batchJobAntiAffinityPenalty = 5.0 19 ) 20 21 // Stack is a chained collection of iterators. The stack is used to 22 // make placement decisions. Different schedulers may customize the 23 // stack they use to vary the way placements are made. 24 type Stack interface { 25 // SetNodes is used to set the base set of potential nodes 26 SetNodes([]*structs.Node) 27 28 // SetTaskGroup is used to set the job for selection 29 SetJob(job *structs.Job) 30 31 // Select is used to select a node for the task group 32 Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) 33 } 34 35 // GenericStack is the Stack used for the Generic scheduler. It is 36 // designed to make better placement decisions at the cost of performance. 37 type GenericStack struct { 38 batch bool 39 ctx Context 40 source *StaticIterator 41 42 wrappedChecks *FeasibilityWrapper 43 jobConstraint *ConstraintChecker 44 taskGroupDrivers *DriverChecker 45 taskGroupConstraint *ConstraintChecker 46 47 proposedAllocConstraint *ProposedAllocConstraintIterator 48 binPack *BinPackIterator 49 jobAntiAff *JobAntiAffinityIterator 50 limit *LimitIterator 51 maxScore *MaxScoreIterator 52 } 53 54 // NewGenericStack constructs a stack used for selecting service placements 55 func NewGenericStack(batch bool, ctx Context) *GenericStack { 56 // Create a new stack 57 s := &GenericStack{ 58 batch: batch, 59 ctx: ctx, 60 } 61 62 // Create the source iterator. We randomize the order we visit nodes 63 // to reduce collisions between schedulers and to do a basic load 64 // balancing across eligible nodes. 65 s.source = NewRandomIterator(ctx, nil) 66 67 // Attach the job constraints. The job is filled in later. 68 s.jobConstraint = NewConstraintChecker(ctx, nil) 69 70 // Filter on task group drivers first as they are faster 71 s.taskGroupDrivers = NewDriverChecker(ctx, nil) 72 73 // Filter on task group constraints second 74 s.taskGroupConstraint = NewConstraintChecker(ctx, nil) 75 76 // Create the feasibility wrapper which wraps all feasibility checks in 77 // which feasibility checking can be skipped if the computed node class has 78 // previously been marked as eligible or ineligible. Generally this will be 79 // checks that only needs to examine the single node to determine feasibility. 80 jobs := []FeasibilityChecker{s.jobConstraint} 81 tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint} 82 s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs) 83 84 // Filter on constraints that are affected by propsed allocations. 85 s.proposedAllocConstraint = NewProposedAllocConstraintIterator(ctx, s.wrappedChecks) 86 87 // Upgrade from feasible to rank iterator 88 rankSource := NewFeasibleRankIterator(ctx, s.proposedAllocConstraint) 89 90 // Apply the bin packing, this depends on the resources needed 91 // by a particular task group. Only enable eviction for the service 92 // scheduler as that logic is expensive. 93 evict := !batch 94 s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0) 95 96 // Apply the job anti-affinity iterator. This is to avoid placing 97 // multiple allocations on the same node for this job. The penalty 98 // is less for batch jobs as it matters less. 99 penalty := serviceJobAntiAffinityPenalty 100 if batch { 101 penalty = batchJobAntiAffinityPenalty 102 } 103 s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "") 104 105 // Apply a limit function. This is to avoid scanning *every* possible node. 106 s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2) 107 108 // Select the node with the maximum score for placement 109 s.maxScore = NewMaxScoreIterator(ctx, s.limit) 110 return s 111 } 112 113 func (s *GenericStack) SetNodes(baseNodes []*structs.Node) { 114 // Shuffle base nodes 115 shuffleNodes(baseNodes) 116 117 // Update the set of base nodes 118 s.source.SetNodes(baseNodes) 119 120 // Apply a limit function. This is to avoid scanning *every* possible node. 121 // For batch jobs we only need to evaluate 2 options and depend on the 122 // power of two choices. For services jobs we need to visit "enough". 123 // Using a log of the total number of nodes is a good restriction, with 124 // at least 2 as the floor 125 limit := 2 126 if n := len(baseNodes); !s.batch && n > 0 { 127 logLimit := int(math.Ceil(math.Log2(float64(n)))) 128 if logLimit > limit { 129 limit = logLimit 130 } 131 } 132 s.limit.SetLimit(limit) 133 } 134 135 func (s *GenericStack) SetJob(job *structs.Job) { 136 s.jobConstraint.SetConstraints(job.Constraints) 137 s.proposedAllocConstraint.SetJob(job) 138 s.binPack.SetPriority(job.Priority) 139 s.jobAntiAff.SetJob(job.ID) 140 s.ctx.Eligibility().SetJob(job) 141 } 142 143 func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 144 // Reset the max selector and context 145 s.maxScore.Reset() 146 s.ctx.Reset() 147 start := time.Now() 148 149 // Get the task groups constraints. 150 tgConstr := taskGroupConstraints(tg) 151 152 // Update the parameters of iterators 153 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 154 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 155 s.proposedAllocConstraint.SetTaskGroup(tg) 156 s.wrappedChecks.SetTaskGroup(tg.Name) 157 s.binPack.SetTasks(tg.Tasks) 158 159 // Find the node with the max score 160 option := s.maxScore.Next() 161 162 // Ensure that the task resources were specified 163 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 164 for _, task := range tg.Tasks { 165 option.SetTaskResources(task, task.Resources) 166 } 167 } 168 169 // Store the compute time 170 s.ctx.Metrics().AllocationTime = time.Since(start) 171 return option, tgConstr.size 172 } 173 174 // SystemStack is the Stack used for the System scheduler. It is designed to 175 // attempt to make placements on all nodes. 176 type SystemStack struct { 177 ctx Context 178 source *StaticIterator 179 wrappedChecks *FeasibilityWrapper 180 jobConstraint *ConstraintChecker 181 taskGroupDrivers *DriverChecker 182 taskGroupConstraint *ConstraintChecker 183 binPack *BinPackIterator 184 } 185 186 // NewSystemStack constructs a stack used for selecting service placements 187 func NewSystemStack(ctx Context) *SystemStack { 188 // Create a new stack 189 s := &SystemStack{ctx: ctx} 190 191 // Create the source iterator. We visit nodes in a linear order because we 192 // have to evaluate on all nodes. 193 s.source = NewStaticIterator(ctx, nil) 194 195 // Attach the job constraints. The job is filled in later. 196 s.jobConstraint = NewConstraintChecker(ctx, nil) 197 198 // Filter on task group drivers first as they are faster 199 s.taskGroupDrivers = NewDriverChecker(ctx, nil) 200 201 // Filter on task group constraints second 202 s.taskGroupConstraint = NewConstraintChecker(ctx, nil) 203 204 // Create the feasibility wrapper which wraps all feasibility checks in 205 // which feasibility checking can be skipped if the computed node class has 206 // previously been marked as eligible or ineligible. Generally this will be 207 // checks that only needs to examine the single node to determine feasibility. 208 jobs := []FeasibilityChecker{s.jobConstraint} 209 tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint} 210 s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs) 211 212 // Upgrade from feasible to rank iterator 213 rankSource := NewFeasibleRankIterator(ctx, s.wrappedChecks) 214 215 // Apply the bin packing, this depends on the resources needed 216 // by a particular task group. Enable eviction as system jobs are high 217 // priority. 218 s.binPack = NewBinPackIterator(ctx, rankSource, true, 0) 219 return s 220 } 221 222 func (s *SystemStack) SetNodes(baseNodes []*structs.Node) { 223 // Update the set of base nodes 224 s.source.SetNodes(baseNodes) 225 } 226 227 func (s *SystemStack) SetJob(job *structs.Job) { 228 s.jobConstraint.SetConstraints(job.Constraints) 229 s.binPack.SetPriority(job.Priority) 230 s.ctx.Eligibility().SetJob(job) 231 } 232 233 func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 234 // Reset the binpack selector and context 235 s.binPack.Reset() 236 s.ctx.Reset() 237 start := time.Now() 238 239 // Get the task groups constraints. 240 tgConstr := taskGroupConstraints(tg) 241 242 // Update the parameters of iterators 243 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 244 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 245 s.binPack.SetTasks(tg.Tasks) 246 s.wrappedChecks.SetTaskGroup(tg.Name) 247 248 // Get the next option that satisfies the constraints. 249 option := s.binPack.Next() 250 251 // Ensure that the task resources were specified 252 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 253 for _, task := range tg.Tasks { 254 option.SetTaskResources(task, task.Resources) 255 } 256 } 257 258 // Store the compute time 259 s.ctx.Metrics().AllocationTime = time.Since(start) 260 return option, tgConstr.size 261 }