github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/rank.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 6 "math" 7 8 "github.com/hashicorp/nomad/nomad/structs" 9 ) 10 11 const ( 12 // binPackingMaxFitScore is the maximum possible bin packing fitness score. 13 // This is used to normalize bin packing score to a value between 0 and 1 14 binPackingMaxFitScore = 18.0 15 ) 16 17 // Rank is used to provide a score and various ranking metadata 18 // along with a node when iterating. This state can be modified as 19 // various rank methods are applied. 20 type RankedNode struct { 21 Node *structs.Node 22 FinalScore float64 23 Scores []float64 24 TaskResources map[string]*structs.Resources 25 26 // Allocs is used to cache the proposed allocations on the 27 // node. This can be shared between iterators that require it. 28 Proposed []*structs.Allocation 29 } 30 31 func (r *RankedNode) GoString() string { 32 return fmt.Sprintf("<Node: %s Score: %0.3f>", r.Node.ID, r.FinalScore) 33 } 34 35 func (r *RankedNode) ProposedAllocs(ctx Context) ([]*structs.Allocation, error) { 36 if r.Proposed != nil { 37 return r.Proposed, nil 38 } 39 40 p, err := ctx.ProposedAllocs(r.Node.ID) 41 if err != nil { 42 return nil, err 43 } 44 r.Proposed = p 45 return p, nil 46 } 47 48 func (r *RankedNode) SetTaskResources(task *structs.Task, 49 resource *structs.Resources) { 50 if r.TaskResources == nil { 51 r.TaskResources = make(map[string]*structs.Resources) 52 } 53 r.TaskResources[task.Name] = resource 54 } 55 56 // RankFeasibleIterator is used to iteratively yield nodes along 57 // with ranking metadata. The iterators may manage some state for 58 // performance optimizations. 59 type RankIterator interface { 60 // Next yields a ranked option or nil if exhausted 61 Next() *RankedNode 62 63 // Reset is invoked when an allocation has been placed 64 // to reset any stale state. 65 Reset() 66 } 67 68 // FeasibleRankIterator is used to consume from a FeasibleIterator 69 // and return an unranked node with base ranking. 70 type FeasibleRankIterator struct { 71 ctx Context 72 source FeasibleIterator 73 } 74 75 // NewFeasibleRankIterator is used to return a new FeasibleRankIterator 76 // from a FeasibleIterator source. 77 func NewFeasibleRankIterator(ctx Context, source FeasibleIterator) *FeasibleRankIterator { 78 iter := &FeasibleRankIterator{ 79 ctx: ctx, 80 source: source, 81 } 82 return iter 83 } 84 85 func (iter *FeasibleRankIterator) Next() *RankedNode { 86 option := iter.source.Next() 87 if option == nil { 88 return nil 89 } 90 ranked := &RankedNode{ 91 Node: option, 92 } 93 return ranked 94 } 95 96 func (iter *FeasibleRankIterator) Reset() { 97 iter.source.Reset() 98 } 99 100 // StaticRankIterator is a RankIterator that returns a static set of results. 101 // This is largely only useful for testing. 102 type StaticRankIterator struct { 103 ctx Context 104 nodes []*RankedNode 105 offset int 106 seen int 107 } 108 109 // NewStaticRankIterator returns a new static rank iterator over the given nodes 110 func NewStaticRankIterator(ctx Context, nodes []*RankedNode) *StaticRankIterator { 111 iter := &StaticRankIterator{ 112 ctx: ctx, 113 nodes: nodes, 114 } 115 return iter 116 } 117 118 func (iter *StaticRankIterator) Next() *RankedNode { 119 // Check if exhausted 120 n := len(iter.nodes) 121 if iter.offset == n || iter.seen == n { 122 if iter.seen != n { 123 iter.offset = 0 124 } else { 125 return nil 126 } 127 } 128 129 // Return the next offset 130 offset := iter.offset 131 iter.offset += 1 132 iter.seen += 1 133 return iter.nodes[offset] 134 } 135 136 func (iter *StaticRankIterator) Reset() { 137 iter.seen = 0 138 } 139 140 // BinPackIterator is a RankIterator that scores potential options 141 // based on a bin-packing algorithm. 142 type BinPackIterator struct { 143 ctx Context 144 source RankIterator 145 evict bool 146 priority int 147 taskGroup *structs.TaskGroup 148 } 149 150 // NewBinPackIterator returns a BinPackIterator which tries to fit tasks 151 // potentially evicting other tasks based on a given priority. 152 func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int) *BinPackIterator { 153 iter := &BinPackIterator{ 154 ctx: ctx, 155 source: source, 156 evict: evict, 157 priority: priority, 158 } 159 return iter 160 } 161 162 func (iter *BinPackIterator) SetPriority(p int) { 163 iter.priority = p 164 } 165 166 func (iter *BinPackIterator) SetTaskGroup(taskGroup *structs.TaskGroup) { 167 iter.taskGroup = taskGroup 168 } 169 170 func (iter *BinPackIterator) Next() *RankedNode { 171 OUTER: 172 for { 173 // Get the next potential option 174 option := iter.source.Next() 175 if option == nil { 176 return nil 177 } 178 179 // Get the proposed allocations 180 proposed, err := option.ProposedAllocs(iter.ctx) 181 if err != nil { 182 iter.ctx.Logger().Printf( 183 "[ERR] sched.binpack: failed to get proposed allocations: %v", 184 err) 185 continue 186 } 187 188 // Index the existing network usage 189 netIdx := structs.NewNetworkIndex() 190 netIdx.SetNode(option.Node) 191 netIdx.AddAllocs(proposed) 192 193 // Assign the resources for each task 194 total := &structs.Resources{ 195 DiskMB: iter.taskGroup.EphemeralDisk.SizeMB, 196 } 197 for _, task := range iter.taskGroup.Tasks { 198 taskResources := task.Resources.Copy() 199 200 // Check if we need a network resource 201 if len(taskResources.Networks) > 0 { 202 ask := taskResources.Networks[0] 203 offer, err := netIdx.AssignNetwork(ask) 204 if offer == nil { 205 iter.ctx.Metrics().ExhaustedNode(option.Node, 206 fmt.Sprintf("network: %s", err)) 207 netIdx.Release() 208 continue OUTER 209 } 210 211 // Reserve this to prevent another task from colliding 212 netIdx.AddReserved(offer) 213 214 // Update the network ask to the offer 215 taskResources.Networks = []*structs.NetworkResource{offer} 216 } 217 218 // Store the task resource 219 option.SetTaskResources(task, taskResources) 220 221 // Accumulate the total resource requirement 222 total.Add(taskResources) 223 } 224 225 // Add the resources we are trying to fit 226 proposed = append(proposed, &structs.Allocation{Resources: total}) 227 228 // Check if these allocations fit, if they do not, simply skip this node 229 fit, dim, util, _ := structs.AllocsFit(option.Node, proposed, netIdx) 230 netIdx.Release() 231 if !fit { 232 iter.ctx.Metrics().ExhaustedNode(option.Node, dim) 233 continue 234 } 235 236 // XXX: For now we completely ignore evictions. We should use that flag 237 // to determine if its possible to evict other lower priority allocations 238 // to make room. This explodes the search space, so it must be done 239 // carefully. 240 241 // Score the fit normally otherwise 242 fitness := structs.ScoreFit(option.Node, util) 243 normalizedFit := fitness / binPackingMaxFitScore 244 option.Scores = append(option.Scores, normalizedFit) 245 iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit) 246 return option 247 } 248 } 249 250 func (iter *BinPackIterator) Reset() { 251 iter.source.Reset() 252 } 253 254 // JobAntiAffinityIterator is used to apply an anti-affinity to allocating 255 // along side other allocations from this job. This is used to help distribute 256 // load across the cluster. 257 type JobAntiAffinityIterator struct { 258 ctx Context 259 source RankIterator 260 jobID string 261 taskGroup string 262 desiredCount int 263 } 264 265 // NewJobAntiAffinityIterator is used to create a JobAntiAffinityIterator that 266 // applies the given penalty for co-placement with allocs from this job. 267 func NewJobAntiAffinityIterator(ctx Context, source RankIterator, jobID string) *JobAntiAffinityIterator { 268 iter := &JobAntiAffinityIterator{ 269 ctx: ctx, 270 source: source, 271 jobID: jobID, 272 } 273 return iter 274 } 275 276 func (iter *JobAntiAffinityIterator) SetJob(job *structs.Job) { 277 iter.jobID = job.ID 278 } 279 280 func (iter *JobAntiAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) { 281 iter.taskGroup = tg.Name 282 iter.desiredCount = tg.Count 283 } 284 285 func (iter *JobAntiAffinityIterator) Next() *RankedNode { 286 for { 287 option := iter.source.Next() 288 if option == nil { 289 return nil 290 } 291 292 // Get the proposed allocations 293 proposed, err := option.ProposedAllocs(iter.ctx) 294 if err != nil { 295 iter.ctx.Logger().Printf( 296 "[ERR] sched.job-anti-aff: failed to get proposed allocations: %v", 297 err) 298 continue 299 } 300 301 // Determine the number of collisions 302 collisions := 0 303 for _, alloc := range proposed { 304 if alloc.JobID == iter.jobID && alloc.TaskGroup == iter.taskGroup { 305 collisions += 1 306 } 307 } 308 309 // Calculate the penalty based on number of collisions 310 // TODO(preetha): Figure out if batch jobs need a different scoring penalty where collisions matter less 311 if collisions > 0 { 312 scorePenalty := -1 * float64(collisions+1) / float64(iter.desiredCount) 313 option.Scores = append(option.Scores, scorePenalty) 314 iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty) 315 } 316 return option 317 } 318 } 319 320 func (iter *JobAntiAffinityIterator) Reset() { 321 iter.source.Reset() 322 } 323 324 // NodeReschedulingPenaltyIterator is used to apply a penalty to 325 // a node that had a previous failed allocation for the same job. 326 // This is used when attempting to reschedule a failed alloc 327 type NodeReschedulingPenaltyIterator struct { 328 ctx Context 329 source RankIterator 330 penaltyNodes map[string]struct{} 331 } 332 333 // NewNodeReschedulingPenaltyIterator is used to create a NodeReschedulingPenaltyIterator that 334 // applies the given scoring penalty for placement onto nodes in penaltyNodes 335 func NewNodeReschedulingPenaltyIterator(ctx Context, source RankIterator) *NodeReschedulingPenaltyIterator { 336 iter := &NodeReschedulingPenaltyIterator{ 337 ctx: ctx, 338 source: source, 339 } 340 return iter 341 } 342 343 func (iter *NodeReschedulingPenaltyIterator) SetPenaltyNodes(penaltyNodes map[string]struct{}) { 344 iter.penaltyNodes = penaltyNodes 345 } 346 347 func (iter *NodeReschedulingPenaltyIterator) Next() *RankedNode { 348 for { 349 option := iter.source.Next() 350 if option == nil { 351 return nil 352 } 353 354 _, ok := iter.penaltyNodes[option.Node.ID] 355 if ok { 356 option.Scores = append(option.Scores, -1) 357 iter.ctx.Metrics().ScoreNode(option.Node, "node-reschedule-penalty", -1) 358 } 359 return option 360 } 361 } 362 363 func (iter *NodeReschedulingPenaltyIterator) Reset() { 364 iter.penaltyNodes = make(map[string]struct{}) 365 iter.source.Reset() 366 } 367 368 // NodeAffinityIterator is used to resolve any affinity rules in the job or task group, 369 // and apply a weighted score to nodes if they match. 370 type NodeAffinityIterator struct { 371 ctx Context 372 source RankIterator 373 jobAffinities []*structs.Affinity 374 affinities []*structs.Affinity 375 } 376 377 // NewNodeAffinityIterator is used to create a NodeAffinityIterator that 378 // applies a weighted score according to whether nodes match any 379 // affinities in the job or task group. 380 func NewNodeAffinityIterator(ctx Context, source RankIterator) *NodeAffinityIterator { 381 return &NodeAffinityIterator{ 382 ctx: ctx, 383 source: source, 384 } 385 } 386 387 func (iter *NodeAffinityIterator) SetJob(job *structs.Job) { 388 iter.jobAffinities = job.Affinities 389 } 390 391 func (iter *NodeAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) { 392 // Merge job affinities 393 if iter.jobAffinities != nil { 394 iter.affinities = append(iter.affinities, iter.jobAffinities...) 395 } 396 397 // Merge task group affinities and task affinities 398 if tg.Affinities != nil { 399 iter.affinities = append(iter.affinities, tg.Affinities...) 400 } 401 for _, task := range tg.Tasks { 402 if task.Affinities != nil { 403 iter.affinities = append(iter.affinities, task.Affinities...) 404 } 405 } 406 } 407 408 func (iter *NodeAffinityIterator) Reset() { 409 iter.source.Reset() 410 // This method is called between each task group, so only reset the merged list 411 iter.affinities = nil 412 } 413 414 func (iter *NodeAffinityIterator) hasAffinities() bool { 415 return len(iter.affinities) > 0 416 } 417 418 func (iter *NodeAffinityIterator) Next() *RankedNode { 419 option := iter.source.Next() 420 if option == nil { 421 return nil 422 } 423 if !iter.hasAffinities() { 424 return option 425 } 426 // TODO(preetha): we should calculate normalized weights once and reuse it here 427 sumWeight := 0.0 428 for _, affinity := range iter.affinities { 429 sumWeight += math.Abs(affinity.Weight) 430 } 431 432 totalAffinityScore := 0.0 433 for _, affinity := range iter.affinities { 434 if matchesAffinity(iter.ctx, affinity, option.Node) { 435 totalAffinityScore += affinity.Weight 436 } 437 } 438 normScore := totalAffinityScore / sumWeight 439 if totalAffinityScore != 0.0 { 440 option.Scores = append(option.Scores, normScore) 441 iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", normScore) 442 } 443 return option 444 } 445 446 func matchesAffinity(ctx Context, affinity *structs.Affinity, option *structs.Node) bool { 447 //TODO(preetha): Add a step here that filters based on computed node class for potential speedup 448 // Resolve the targets 449 lVal, ok := resolveTarget(affinity.LTarget, option) 450 if !ok { 451 return false 452 } 453 rVal, ok := resolveTarget(affinity.RTarget, option) 454 if !ok { 455 return false 456 } 457 458 // Check if satisfied 459 return checkAffinity(ctx, affinity.Operand, lVal, rVal) 460 } 461 462 // ScoreNormalizationIterator is used to combine scores from various prior 463 // iterators and combine them into one final score. The current implementation 464 // averages the scores together. 465 type ScoreNormalizationIterator struct { 466 ctx Context 467 source RankIterator 468 } 469 470 // NewScoreNormalizationIterator is used to create a ScoreNormalizationIterator that 471 // averages scores from various iterators into a final score. 472 func NewScoreNormalizationIterator(ctx Context, source RankIterator) *ScoreNormalizationIterator { 473 return &ScoreNormalizationIterator{ 474 ctx: ctx, 475 source: source} 476 } 477 478 func (iter *ScoreNormalizationIterator) Reset() { 479 iter.source.Reset() 480 } 481 482 func (iter *ScoreNormalizationIterator) Next() *RankedNode { 483 option := iter.source.Next() 484 if option == nil || len(option.Scores) == 0 { 485 return option 486 } 487 numScorers := len(option.Scores) 488 sum := 0.0 489 for _, score := range option.Scores { 490 sum += score 491 } 492 option.FinalScore = sum / float64(numScorers) 493 //TODO(preetha): Turn map in allocmetrics into a heap of topK scores 494 iter.ctx.Metrics().ScoreNode(option.Node, "normalized-score", option.FinalScore) 495 return option 496 }