github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/scheduler/stack.go (about) 1 package scheduler 2 3 import ( 4 "math" 5 "time" 6 7 "github.com/hashicorp/nomad/nomad/structs" 8 ) 9 10 const ( 11 // serviceJobAntiAffinityPenalty is the penalty applied 12 // to the score for placing an alloc on a node that 13 // already has an alloc for this job. 14 serviceJobAntiAffinityPenalty = 10.0 15 16 // batchJobAntiAffinityPenalty is the same as the 17 // serviceJobAntiAffinityPenalty but for batch type jobs. 18 batchJobAntiAffinityPenalty = 5.0 19 ) 20 21 // Stack is a chained collection of iterators. The stack is used to 22 // make placement decisions. Different schedulers may customize the 23 // stack they use to vary the way placements are made. 24 type Stack interface { 25 // SetNodes is used to set the base set of potential nodes 26 SetNodes([]*structs.Node) 27 28 // SetTaskGroup is used to set the job for selection 29 SetJob(job *structs.Job) 30 31 // Select is used to select a node for the task group 32 Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) 33 } 34 35 // GenericStack is the Stack used for the Generic scheduler. It is 36 // designed to make better placement decisions at the cost of performance. 37 type GenericStack struct { 38 batch bool 39 ctx Context 40 source *StaticIterator 41 42 wrappedChecks *FeasibilityWrapper 43 jobConstraint *ConstraintChecker 44 taskGroupDrivers *DriverChecker 45 taskGroupConstraint *ConstraintChecker 46 47 proposedAllocConstraint *ProposedAllocConstraintIterator 48 binPack *BinPackIterator 49 jobAntiAff *JobAntiAffinityIterator 50 limit *LimitIterator 51 maxScore *MaxScoreIterator 52 } 53 54 // NewGenericStack constructs a stack used for selecting service placements 55 func NewGenericStack(batch bool, ctx Context) *GenericStack { 56 // Create a new stack 57 s := &GenericStack{ 58 batch: batch, 59 ctx: ctx, 60 } 61 62 // Create the source iterator. We randomize the order we visit nodes 63 // to reduce collisions between schedulers and to do a basic load 64 // balancing across eligible nodes. 65 s.source = NewRandomIterator(ctx, nil) 66 67 // Attach the job constraints. The job is filled in later. 68 s.jobConstraint = NewConstraintChecker(ctx, nil) 69 70 // Filter on task group drivers first as they are faster 71 s.taskGroupDrivers = NewDriverChecker(ctx, nil) 72 73 // Filter on task group constraints second 74 s.taskGroupConstraint = NewConstraintChecker(ctx, nil) 75 76 // Create the feasibility wrapper which wraps all feasibility checks in 77 // which feasibility checking can be skipped if the computed node class has 78 // previously been marked as eligible or ineligible. Generally this will be 79 // checks that only needs to examine the single node to determine feasibility. 80 jobs := []FeasibilityChecker{s.jobConstraint} 81 tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint} 82 s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs) 83 84 // Filter on constraints that are affected by propsed allocations. 85 s.proposedAllocConstraint = NewProposedAllocConstraintIterator(ctx, s.wrappedChecks) 86 87 // Upgrade from feasible to rank iterator 88 rankSource := NewFeasibleRankIterator(ctx, s.proposedAllocConstraint) 89 90 // Apply the bin packing, this depends on the resources needed 91 // by a particular task group. Only enable eviction for the service 92 // scheduler as that logic is expensive. 93 evict := !batch 94 s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0) 95 96 // Apply the job anti-affinity iterator. This is to avoid placing 97 // multiple allocations on the same node for this job. The penalty 98 // is less for batch jobs as it matters less. 99 penalty := serviceJobAntiAffinityPenalty 100 if batch { 101 penalty = batchJobAntiAffinityPenalty 102 } 103 s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "") 104 105 // Apply a limit function. This is to avoid scanning *every* possible node. 106 s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2) 107 108 // Select the node with the maximum score for placement 109 s.maxScore = NewMaxScoreIterator(ctx, s.limit) 110 return s 111 } 112 113 func (s *GenericStack) SetNodes(baseNodes []*structs.Node) { 114 // Shuffle base nodes 115 shuffleNodes(baseNodes) 116 117 // Update the set of base nodes 118 s.source.SetNodes(baseNodes) 119 120 // Apply a limit function. This is to avoid scanning *every* possible node. 121 // For batch jobs we only need to evaluate 2 options and depend on the 122 // power of two choices. For services jobs we need to visit "enough". 123 // Using a log of the total number of nodes is a good restriction, with 124 // at least 2 as the floor 125 limit := 2 126 if n := len(baseNodes); !s.batch && n > 0 { 127 logLimit := int(math.Ceil(math.Log2(float64(n)))) 128 if logLimit > limit { 129 limit = logLimit 130 } 131 } 132 s.limit.SetLimit(limit) 133 } 134 135 func (s *GenericStack) SetJob(job *structs.Job) { 136 s.jobConstraint.SetConstraints(job.Constraints) 137 s.proposedAllocConstraint.SetJob(job) 138 s.binPack.SetPriority(job.Priority) 139 s.jobAntiAff.SetJob(job.ID) 140 s.ctx.Eligibility().SetJob(job) 141 } 142 143 func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 144 // Reset the max selector and context 145 s.maxScore.Reset() 146 s.ctx.Reset() 147 start := time.Now() 148 149 // Get the task groups constraints. 150 tgConstr := taskGroupConstraints(tg) 151 152 // Update the parameters of iterators 153 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 154 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 155 s.proposedAllocConstraint.SetTaskGroup(tg) 156 s.wrappedChecks.SetTaskGroup(tg.Name) 157 s.binPack.SetTaskGroup(tg) 158 159 // Find the node with the max score 160 option := s.maxScore.Next() 161 162 // Ensure that the task resources were specified 163 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 164 for _, task := range tg.Tasks { 165 option.SetTaskResources(task, task.Resources) 166 } 167 } 168 169 // Store the compute time 170 s.ctx.Metrics().AllocationTime = time.Since(start) 171 return option, tgConstr.size 172 } 173 174 // SelectPreferredNode returns a node where an allocation of the task group can 175 // be placed, the node passed to it is preferred over the other available nodes 176 func (s *GenericStack) SelectPreferringNodes(tg *structs.TaskGroup, nodes []*structs.Node) (*RankedNode, *structs.Resources) { 177 originalNodes := s.source.nodes 178 s.source.SetNodes(nodes) 179 if option, resources := s.Select(tg); option != nil { 180 s.source.SetNodes(originalNodes) 181 return option, resources 182 } 183 s.source.SetNodes(originalNodes) 184 return s.Select(tg) 185 } 186 187 // SystemStack is the Stack used for the System scheduler. It is designed to 188 // attempt to make placements on all nodes. 189 type SystemStack struct { 190 ctx Context 191 source *StaticIterator 192 wrappedChecks *FeasibilityWrapper 193 jobConstraint *ConstraintChecker 194 taskGroupDrivers *DriverChecker 195 taskGroupConstraint *ConstraintChecker 196 binPack *BinPackIterator 197 } 198 199 // NewSystemStack constructs a stack used for selecting service placements 200 func NewSystemStack(ctx Context) *SystemStack { 201 // Create a new stack 202 s := &SystemStack{ctx: ctx} 203 204 // Create the source iterator. We visit nodes in a linear order because we 205 // have to evaluate on all nodes. 206 s.source = NewStaticIterator(ctx, nil) 207 208 // Attach the job constraints. The job is filled in later. 209 s.jobConstraint = NewConstraintChecker(ctx, nil) 210 211 // Filter on task group drivers first as they are faster 212 s.taskGroupDrivers = NewDriverChecker(ctx, nil) 213 214 // Filter on task group constraints second 215 s.taskGroupConstraint = NewConstraintChecker(ctx, nil) 216 217 // Create the feasibility wrapper which wraps all feasibility checks in 218 // which feasibility checking can be skipped if the computed node class has 219 // previously been marked as eligible or ineligible. Generally this will be 220 // checks that only needs to examine the single node to determine feasibility. 221 jobs := []FeasibilityChecker{s.jobConstraint} 222 tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint} 223 s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs) 224 225 // Upgrade from feasible to rank iterator 226 rankSource := NewFeasibleRankIterator(ctx, s.wrappedChecks) 227 228 // Apply the bin packing, this depends on the resources needed 229 // by a particular task group. Enable eviction as system jobs are high 230 // priority. 231 s.binPack = NewBinPackIterator(ctx, rankSource, true, 0) 232 return s 233 } 234 235 func (s *SystemStack) SetNodes(baseNodes []*structs.Node) { 236 // Update the set of base nodes 237 s.source.SetNodes(baseNodes) 238 } 239 240 func (s *SystemStack) SetJob(job *structs.Job) { 241 s.jobConstraint.SetConstraints(job.Constraints) 242 s.binPack.SetPriority(job.Priority) 243 s.ctx.Eligibility().SetJob(job) 244 } 245 246 func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 247 // Reset the binpack selector and context 248 s.binPack.Reset() 249 s.ctx.Reset() 250 start := time.Now() 251 252 // Get the task groups constraints. 253 tgConstr := taskGroupConstraints(tg) 254 255 // Update the parameters of iterators 256 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 257 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 258 s.binPack.SetTaskGroup(tg) 259 s.wrappedChecks.SetTaskGroup(tg.Name) 260 261 // Get the next option that satisfies the constraints. 262 option := s.binPack.Next() 263 264 // Ensure that the task resources were specified 265 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 266 for _, task := range tg.Tasks { 267 option.SetTaskResources(task, task.Resources) 268 } 269 } 270 271 // Store the compute time 272 s.ctx.Metrics().AllocationTime = time.Since(start) 273 return option, tgConstr.size 274 }