github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/scheduler/stack.go (about) 1 package scheduler 2 3 import ( 4 "math" 5 "time" 6 7 "github.com/hashicorp/nomad/nomad/structs" 8 ) 9 10 const ( 11 // serviceJobAntiAffinityPenalty is the penalty applied 12 // to the score for placing an alloc on a node that 13 // already has an alloc for this job. 14 serviceJobAntiAffinityPenalty = 10.0 15 16 // batchJobAntiAffinityPenalty is the same as the 17 // serviceJobAntiAffinityPenalty but for batch type jobs. 18 batchJobAntiAffinityPenalty = 5.0 19 ) 20 21 // Stack is a chained collection of iterators. The stack is used to 22 // make placement decisions. Different schedulers may customize the 23 // stack they use to vary the way placements are made. 24 type Stack interface { 25 // SetNodes is used to set the base set of potential nodes 26 SetNodes([]*structs.Node) 27 28 // SetTaskGroup is used to set the job for selection 29 SetJob(job *structs.Job) 30 31 // Select is used to select a node for the task group 32 Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) 33 } 34 35 // GenericStack is the Stack used for the Generic scheduler. It is 36 // designed to make better placement decisions at the cost of performance. 37 type GenericStack struct { 38 batch bool 39 ctx Context 40 source *StaticIterator 41 jobConstraint *ConstraintIterator 42 taskGroupDrivers *DriverIterator 43 taskGroupConstraint *ConstraintIterator 44 binPack *BinPackIterator 45 jobAntiAff *JobAntiAffinityIterator 46 limit *LimitIterator 47 maxScore *MaxScoreIterator 48 } 49 50 // NewGenericStack constructs a stack used for selecting service placements 51 func NewGenericStack(batch bool, ctx Context) *GenericStack { 52 // Create a new stack 53 s := &GenericStack{ 54 batch: batch, 55 ctx: ctx, 56 } 57 58 // Create the source iterator. We randomize the order we visit nodes 59 // to reduce collisions between schedulers and to do a basic load 60 // balancing across eligible nodes. 61 s.source = NewRandomIterator(ctx, nil) 62 63 // Attach the job constraints. The job is filled in later. 64 s.jobConstraint = NewConstraintIterator(ctx, s.source, nil) 65 66 // Filter on task group drivers first as they are faster 67 s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil) 68 69 // Filter on task group constraints second 70 s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil) 71 72 // Upgrade from feasible to rank iterator 73 rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint) 74 75 // Apply the bin packing, this depends on the resources needed 76 // by a particular task group. Only enable eviction for the service 77 // scheduler as that logic is expensive. 78 evict := !batch 79 s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0) 80 81 // Apply the job anti-affinity iterator. This is to avoid placing 82 // multiple allocations on the same node for this job. The penalty 83 // is less for batch jobs as it matters less. 84 penalty := serviceJobAntiAffinityPenalty 85 if batch { 86 penalty = batchJobAntiAffinityPenalty 87 } 88 s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "") 89 90 // Apply a limit function. This is to avoid scanning *every* possible node. 91 s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2) 92 93 // Select the node with the maximum score for placement 94 s.maxScore = NewMaxScoreIterator(ctx, s.limit) 95 return s 96 } 97 98 func (s *GenericStack) SetNodes(baseNodes []*structs.Node) { 99 // Shuffle base nodes 100 shuffleNodes(baseNodes) 101 102 // Update the set of base nodes 103 s.source.SetNodes(baseNodes) 104 105 // Apply a limit function. This is to avoid scanning *every* possible node. 106 // For batch jobs we only need to evaluate 2 options and depend on the 107 // power of two choices. For services jobs we need to visit "enough". 108 // Using a log of the total number of nodes is a good restriction, with 109 // at least 2 as the floor 110 limit := 2 111 if n := len(baseNodes); !s.batch && n > 0 { 112 logLimit := int(math.Ceil(math.Log2(float64(n)))) 113 if logLimit > limit { 114 limit = logLimit 115 } 116 } 117 s.limit.SetLimit(limit) 118 } 119 120 func (s *GenericStack) SetJob(job *structs.Job) { 121 s.jobConstraint.SetConstraints(job.Constraints) 122 s.binPack.SetPriority(job.Priority) 123 s.jobAntiAff.SetJob(job.ID) 124 } 125 126 func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 127 // Reset the max selector and context 128 s.maxScore.Reset() 129 s.ctx.Reset() 130 start := time.Now() 131 132 // Get the task groups constraints. 133 tgConstr := taskGroupConstraints(tg) 134 135 // Update the parameters of iterators 136 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 137 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 138 s.binPack.SetTasks(tg.Tasks) 139 140 // Find the node with the max score 141 option := s.maxScore.Next() 142 143 // Ensure that the task resources were specified 144 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 145 for _, task := range tg.Tasks { 146 option.SetTaskResources(task, task.Resources) 147 } 148 } 149 150 // Store the compute time 151 s.ctx.Metrics().AllocationTime = time.Since(start) 152 return option, tgConstr.size 153 } 154 155 // SystemStack is the Stack used for the System scheduler. It is designed to 156 // attempt to make placements on all nodes. 157 type SystemStack struct { 158 ctx Context 159 source *StaticIterator 160 jobConstraint *ConstraintIterator 161 taskGroupDrivers *DriverIterator 162 taskGroupConstraint *ConstraintIterator 163 binPack *BinPackIterator 164 } 165 166 // NewSystemStack constructs a stack used for selecting service placements 167 func NewSystemStack(ctx Context) *SystemStack { 168 // Create a new stack 169 s := &SystemStack{ctx: ctx} 170 171 // Create the source iterator. We visit nodes in a linear order because we 172 // have to evaluate on all nodes. 173 s.source = NewStaticIterator(ctx, nil) 174 175 // Attach the job constraints. The job is filled in later. 176 s.jobConstraint = NewConstraintIterator(ctx, s.source, nil) 177 178 // Filter on task group drivers first as they are faster 179 s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil) 180 181 // Filter on task group constraints second 182 s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil) 183 184 // Upgrade from feasible to rank iterator 185 rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint) 186 187 // Apply the bin packing, this depends on the resources needed 188 // by a particular task group. Enable eviction as system jobs are high 189 // priority. 190 s.binPack = NewBinPackIterator(ctx, rankSource, true, 0) 191 return s 192 } 193 194 func (s *SystemStack) SetNodes(baseNodes []*structs.Node) { 195 // Update the set of base nodes 196 s.source.SetNodes(baseNodes) 197 } 198 199 func (s *SystemStack) SetJob(job *structs.Job) { 200 s.jobConstraint.SetConstraints(job.Constraints) 201 s.binPack.SetPriority(job.Priority) 202 } 203 204 func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) { 205 // Reset the binpack selector and context 206 s.binPack.Reset() 207 s.ctx.Reset() 208 start := time.Now() 209 210 // Get the task groups constraints. 211 tgConstr := taskGroupConstraints(tg) 212 213 // Update the parameters of iterators 214 s.taskGroupDrivers.SetDrivers(tgConstr.drivers) 215 s.taskGroupConstraint.SetConstraints(tgConstr.constraints) 216 s.binPack.SetTasks(tg.Tasks) 217 218 // Get the next option that satisfies the constraints. 219 option := s.binPack.Next() 220 221 // Ensure that the task resources were specified 222 if option != nil && len(option.TaskResources) != len(tg.Tasks) { 223 for _, task := range tg.Tasks { 224 option.SetTaskResources(task, task.Resources) 225 } 226 } 227 228 // Store the compute time 229 s.ctx.Metrics().AllocationTime = time.Since(start) 230 return option, tgConstr.size 231 }