github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/rank.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"math"
     7  
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  )
    10  
    11  const (
    12  	// binPackingMaxFitScore is the maximum possible bin packing fitness score.
    13  	// This is used to normalize bin packing score to a value between 0 and 1
    14  	binPackingMaxFitScore = 18.0
    15  )
    16  
    17  // Rank is used to provide a score and various ranking metadata
    18  // along with a node when iterating. This state can be modified as
    19  // various rank methods are applied.
    20  type RankedNode struct {
    21  	Node          *structs.Node
    22  	FinalScore    float64
    23  	Scores        []float64
    24  	TaskResources map[string]*structs.Resources
    25  
    26  	// Allocs is used to cache the proposed allocations on the
    27  	// node. This can be shared between iterators that require it.
    28  	Proposed []*structs.Allocation
    29  }
    30  
    31  func (r *RankedNode) GoString() string {
    32  	return fmt.Sprintf("<Node: %s Score: %0.3f>", r.Node.ID, r.FinalScore)
    33  }
    34  
    35  func (r *RankedNode) ProposedAllocs(ctx Context) ([]*structs.Allocation, error) {
    36  	if r.Proposed != nil {
    37  		return r.Proposed, nil
    38  	}
    39  
    40  	p, err := ctx.ProposedAllocs(r.Node.ID)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	r.Proposed = p
    45  	return p, nil
    46  }
    47  
    48  func (r *RankedNode) SetTaskResources(task *structs.Task,
    49  	resource *structs.Resources) {
    50  	if r.TaskResources == nil {
    51  		r.TaskResources = make(map[string]*structs.Resources)
    52  	}
    53  	r.TaskResources[task.Name] = resource
    54  }
    55  
    56  // RankFeasibleIterator is used to iteratively yield nodes along
    57  // with ranking metadata. The iterators may manage some state for
    58  // performance optimizations.
    59  type RankIterator interface {
    60  	// Next yields a ranked option or nil if exhausted
    61  	Next() *RankedNode
    62  
    63  	// Reset is invoked when an allocation has been placed
    64  	// to reset any stale state.
    65  	Reset()
    66  }
    67  
    68  // FeasibleRankIterator is used to consume from a FeasibleIterator
    69  // and return an unranked node with base ranking.
    70  type FeasibleRankIterator struct {
    71  	ctx    Context
    72  	source FeasibleIterator
    73  }
    74  
    75  // NewFeasibleRankIterator is used to return a new FeasibleRankIterator
    76  // from a FeasibleIterator source.
    77  func NewFeasibleRankIterator(ctx Context, source FeasibleIterator) *FeasibleRankIterator {
    78  	iter := &FeasibleRankIterator{
    79  		ctx:    ctx,
    80  		source: source,
    81  	}
    82  	return iter
    83  }
    84  
    85  func (iter *FeasibleRankIterator) Next() *RankedNode {
    86  	option := iter.source.Next()
    87  	if option == nil {
    88  		return nil
    89  	}
    90  	ranked := &RankedNode{
    91  		Node: option,
    92  	}
    93  	return ranked
    94  }
    95  
    96  func (iter *FeasibleRankIterator) Reset() {
    97  	iter.source.Reset()
    98  }
    99  
   100  // StaticRankIterator is a RankIterator that returns a static set of results.
   101  // This is largely only useful for testing.
   102  type StaticRankIterator struct {
   103  	ctx    Context
   104  	nodes  []*RankedNode
   105  	offset int
   106  	seen   int
   107  }
   108  
   109  // NewStaticRankIterator returns a new static rank iterator over the given nodes
   110  func NewStaticRankIterator(ctx Context, nodes []*RankedNode) *StaticRankIterator {
   111  	iter := &StaticRankIterator{
   112  		ctx:   ctx,
   113  		nodes: nodes,
   114  	}
   115  	return iter
   116  }
   117  
   118  func (iter *StaticRankIterator) Next() *RankedNode {
   119  	// Check if exhausted
   120  	n := len(iter.nodes)
   121  	if iter.offset == n || iter.seen == n {
   122  		if iter.seen != n {
   123  			iter.offset = 0
   124  		} else {
   125  			return nil
   126  		}
   127  	}
   128  
   129  	// Return the next offset
   130  	offset := iter.offset
   131  	iter.offset += 1
   132  	iter.seen += 1
   133  	return iter.nodes[offset]
   134  }
   135  
   136  func (iter *StaticRankIterator) Reset() {
   137  	iter.seen = 0
   138  }
   139  
   140  // BinPackIterator is a RankIterator that scores potential options
   141  // based on a bin-packing algorithm.
   142  type BinPackIterator struct {
   143  	ctx       Context
   144  	source    RankIterator
   145  	evict     bool
   146  	priority  int
   147  	taskGroup *structs.TaskGroup
   148  }
   149  
   150  // NewBinPackIterator returns a BinPackIterator which tries to fit tasks
   151  // potentially evicting other tasks based on a given priority.
   152  func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int) *BinPackIterator {
   153  	iter := &BinPackIterator{
   154  		ctx:      ctx,
   155  		source:   source,
   156  		evict:    evict,
   157  		priority: priority,
   158  	}
   159  	return iter
   160  }
   161  
   162  func (iter *BinPackIterator) SetPriority(p int) {
   163  	iter.priority = p
   164  }
   165  
   166  func (iter *BinPackIterator) SetTaskGroup(taskGroup *structs.TaskGroup) {
   167  	iter.taskGroup = taskGroup
   168  }
   169  
   170  func (iter *BinPackIterator) Next() *RankedNode {
   171  OUTER:
   172  	for {
   173  		// Get the next potential option
   174  		option := iter.source.Next()
   175  		if option == nil {
   176  			return nil
   177  		}
   178  
   179  		// Get the proposed allocations
   180  		proposed, err := option.ProposedAllocs(iter.ctx)
   181  		if err != nil {
   182  			iter.ctx.Logger().Printf(
   183  				"[ERR] sched.binpack: failed to get proposed allocations: %v",
   184  				err)
   185  			continue
   186  		}
   187  
   188  		// Index the existing network usage
   189  		netIdx := structs.NewNetworkIndex()
   190  		netIdx.SetNode(option.Node)
   191  		netIdx.AddAllocs(proposed)
   192  
   193  		// Assign the resources for each task
   194  		total := &structs.Resources{
   195  			DiskMB: iter.taskGroup.EphemeralDisk.SizeMB,
   196  		}
   197  		for _, task := range iter.taskGroup.Tasks {
   198  			taskResources := task.Resources.Copy()
   199  
   200  			// Check if we need a network resource
   201  			if len(taskResources.Networks) > 0 {
   202  				ask := taskResources.Networks[0]
   203  				offer, err := netIdx.AssignNetwork(ask)
   204  				if offer == nil {
   205  					iter.ctx.Metrics().ExhaustedNode(option.Node,
   206  						fmt.Sprintf("network: %s", err))
   207  					netIdx.Release()
   208  					continue OUTER
   209  				}
   210  
   211  				// Reserve this to prevent another task from colliding
   212  				netIdx.AddReserved(offer)
   213  
   214  				// Update the network ask to the offer
   215  				taskResources.Networks = []*structs.NetworkResource{offer}
   216  			}
   217  
   218  			// Store the task resource
   219  			option.SetTaskResources(task, taskResources)
   220  
   221  			// Accumulate the total resource requirement
   222  			total.Add(taskResources)
   223  		}
   224  
   225  		// Add the resources we are trying to fit
   226  		proposed = append(proposed, &structs.Allocation{Resources: total})
   227  
   228  		// Check if these allocations fit, if they do not, simply skip this node
   229  		fit, dim, util, _ := structs.AllocsFit(option.Node, proposed, netIdx)
   230  		netIdx.Release()
   231  		if !fit {
   232  			iter.ctx.Metrics().ExhaustedNode(option.Node, dim)
   233  			continue
   234  		}
   235  
   236  		// XXX: For now we completely ignore evictions. We should use that flag
   237  		// to determine if its possible to evict other lower priority allocations
   238  		// to make room. This explodes the search space, so it must be done
   239  		// carefully.
   240  
   241  		// Score the fit normally otherwise
   242  		fitness := structs.ScoreFit(option.Node, util)
   243  		normalizedFit := fitness / binPackingMaxFitScore
   244  		option.Scores = append(option.Scores, normalizedFit)
   245  		iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit)
   246  		return option
   247  	}
   248  }
   249  
   250  func (iter *BinPackIterator) Reset() {
   251  	iter.source.Reset()
   252  }
   253  
   254  // JobAntiAffinityIterator is used to apply an anti-affinity to allocating
   255  // along side other allocations from this job. This is used to help distribute
   256  // load across the cluster.
   257  type JobAntiAffinityIterator struct {
   258  	ctx          Context
   259  	source       RankIterator
   260  	jobID        string
   261  	taskGroup    string
   262  	desiredCount int
   263  }
   264  
   265  // NewJobAntiAffinityIterator is used to create a JobAntiAffinityIterator that
   266  // applies the given penalty for co-placement with allocs from this job.
   267  func NewJobAntiAffinityIterator(ctx Context, source RankIterator, jobID string) *JobAntiAffinityIterator {
   268  	iter := &JobAntiAffinityIterator{
   269  		ctx:    ctx,
   270  		source: source,
   271  		jobID:  jobID,
   272  	}
   273  	return iter
   274  }
   275  
   276  func (iter *JobAntiAffinityIterator) SetJob(job *structs.Job) {
   277  	iter.jobID = job.ID
   278  }
   279  
   280  func (iter *JobAntiAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) {
   281  	iter.taskGroup = tg.Name
   282  	iter.desiredCount = tg.Count
   283  }
   284  
   285  func (iter *JobAntiAffinityIterator) Next() *RankedNode {
   286  	for {
   287  		option := iter.source.Next()
   288  		if option == nil {
   289  			return nil
   290  		}
   291  
   292  		// Get the proposed allocations
   293  		proposed, err := option.ProposedAllocs(iter.ctx)
   294  		if err != nil {
   295  			iter.ctx.Logger().Printf(
   296  				"[ERR] sched.job-anti-aff: failed to get proposed allocations: %v",
   297  				err)
   298  			continue
   299  		}
   300  
   301  		// Determine the number of collisions
   302  		collisions := 0
   303  		for _, alloc := range proposed {
   304  			if alloc.JobID == iter.jobID && alloc.TaskGroup == iter.taskGroup {
   305  				collisions += 1
   306  			}
   307  		}
   308  
   309  		// Calculate the penalty based on number of collisions
   310  		// TODO(preetha): Figure out if batch jobs need a different scoring penalty where collisions matter less
   311  		if collisions > 0 {
   312  			scorePenalty := -1 * float64(collisions+1) / float64(iter.desiredCount)
   313  			option.Scores = append(option.Scores, scorePenalty)
   314  			iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
   315  		}
   316  		return option
   317  	}
   318  }
   319  
   320  func (iter *JobAntiAffinityIterator) Reset() {
   321  	iter.source.Reset()
   322  }
   323  
   324  // NodeReschedulingPenaltyIterator is used to apply a penalty to
   325  // a node that had a previous failed allocation for the same job.
   326  // This is used when attempting to reschedule a failed alloc
   327  type NodeReschedulingPenaltyIterator struct {
   328  	ctx          Context
   329  	source       RankIterator
   330  	penaltyNodes map[string]struct{}
   331  }
   332  
   333  // NewNodeReschedulingPenaltyIterator is used to create a NodeReschedulingPenaltyIterator that
   334  // applies the given scoring penalty for placement onto nodes in penaltyNodes
   335  func NewNodeReschedulingPenaltyIterator(ctx Context, source RankIterator) *NodeReschedulingPenaltyIterator {
   336  	iter := &NodeReschedulingPenaltyIterator{
   337  		ctx:    ctx,
   338  		source: source,
   339  	}
   340  	return iter
   341  }
   342  
   343  func (iter *NodeReschedulingPenaltyIterator) SetPenaltyNodes(penaltyNodes map[string]struct{}) {
   344  	iter.penaltyNodes = penaltyNodes
   345  }
   346  
   347  func (iter *NodeReschedulingPenaltyIterator) Next() *RankedNode {
   348  	for {
   349  		option := iter.source.Next()
   350  		if option == nil {
   351  			return nil
   352  		}
   353  
   354  		_, ok := iter.penaltyNodes[option.Node.ID]
   355  		if ok {
   356  			option.Scores = append(option.Scores, -1)
   357  			iter.ctx.Metrics().ScoreNode(option.Node, "node-reschedule-penalty", -1)
   358  		}
   359  		return option
   360  	}
   361  }
   362  
   363  func (iter *NodeReschedulingPenaltyIterator) Reset() {
   364  	iter.penaltyNodes = make(map[string]struct{})
   365  	iter.source.Reset()
   366  }
   367  
   368  // NodeAffinityIterator is used to resolve any affinity rules in the job or task group,
   369  // and apply a weighted score to nodes if they match.
   370  type NodeAffinityIterator struct {
   371  	ctx           Context
   372  	source        RankIterator
   373  	jobAffinities []*structs.Affinity
   374  	affinities    []*structs.Affinity
   375  }
   376  
   377  // NewNodeAffinityIterator is used to create a NodeAffinityIterator that
   378  // applies a weighted score according to whether nodes match any
   379  // affinities in the job or task group.
   380  func NewNodeAffinityIterator(ctx Context, source RankIterator) *NodeAffinityIterator {
   381  	return &NodeAffinityIterator{
   382  		ctx:    ctx,
   383  		source: source,
   384  	}
   385  }
   386  
   387  func (iter *NodeAffinityIterator) SetJob(job *structs.Job) {
   388  	iter.jobAffinities = job.Affinities
   389  }
   390  
   391  func (iter *NodeAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) {
   392  	// Merge job affinities
   393  	if iter.jobAffinities != nil {
   394  		iter.affinities = append(iter.affinities, iter.jobAffinities...)
   395  	}
   396  
   397  	// Merge task group affinities and task affinities
   398  	if tg.Affinities != nil {
   399  		iter.affinities = append(iter.affinities, tg.Affinities...)
   400  	}
   401  	for _, task := range tg.Tasks {
   402  		if task.Affinities != nil {
   403  			iter.affinities = append(iter.affinities, task.Affinities...)
   404  		}
   405  	}
   406  }
   407  
   408  func (iter *NodeAffinityIterator) Reset() {
   409  	iter.source.Reset()
   410  	// This method is called between each task group, so only reset the merged list
   411  	iter.affinities = nil
   412  }
   413  
   414  func (iter *NodeAffinityIterator) hasAffinities() bool {
   415  	return len(iter.affinities) > 0
   416  }
   417  
   418  func (iter *NodeAffinityIterator) Next() *RankedNode {
   419  	option := iter.source.Next()
   420  	if option == nil {
   421  		return nil
   422  	}
   423  	if !iter.hasAffinities() {
   424  		return option
   425  	}
   426  	// TODO(preetha): we should calculate normalized weights once and reuse it here
   427  	sumWeight := 0.0
   428  	for _, affinity := range iter.affinities {
   429  		sumWeight += math.Abs(affinity.Weight)
   430  	}
   431  
   432  	totalAffinityScore := 0.0
   433  	for _, affinity := range iter.affinities {
   434  		if matchesAffinity(iter.ctx, affinity, option.Node) {
   435  			totalAffinityScore += affinity.Weight
   436  		}
   437  	}
   438  	normScore := totalAffinityScore / sumWeight
   439  	if totalAffinityScore != 0.0 {
   440  		option.Scores = append(option.Scores, normScore)
   441  		iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", normScore)
   442  	}
   443  	return option
   444  }
   445  
   446  func matchesAffinity(ctx Context, affinity *structs.Affinity, option *structs.Node) bool {
   447  	//TODO(preetha): Add a step here that filters based on computed node class for potential speedup
   448  	// Resolve the targets
   449  	lVal, ok := resolveTarget(affinity.LTarget, option)
   450  	if !ok {
   451  		return false
   452  	}
   453  	rVal, ok := resolveTarget(affinity.RTarget, option)
   454  	if !ok {
   455  		return false
   456  	}
   457  
   458  	// Check if satisfied
   459  	return checkAffinity(ctx, affinity.Operand, lVal, rVal)
   460  }
   461  
   462  // ScoreNormalizationIterator is used to combine scores from various prior
   463  // iterators and combine them into one final score. The current implementation
   464  // averages the scores together.
   465  type ScoreNormalizationIterator struct {
   466  	ctx    Context
   467  	source RankIterator
   468  }
   469  
   470  // NewScoreNormalizationIterator is used to create a ScoreNormalizationIterator that
   471  // averages scores from various iterators into a final score.
   472  func NewScoreNormalizationIterator(ctx Context, source RankIterator) *ScoreNormalizationIterator {
   473  	return &ScoreNormalizationIterator{
   474  		ctx:    ctx,
   475  		source: source}
   476  }
   477  
   478  func (iter *ScoreNormalizationIterator) Reset() {
   479  	iter.source.Reset()
   480  }
   481  
   482  func (iter *ScoreNormalizationIterator) Next() *RankedNode {
   483  	option := iter.source.Next()
   484  	if option == nil || len(option.Scores) == 0 {
   485  		return option
   486  	}
   487  	numScorers := len(option.Scores)
   488  	sum := 0.0
   489  	for _, score := range option.Scores {
   490  		sum += score
   491  	}
   492  	option.FinalScore = sum / float64(numScorers)
   493  	//TODO(preetha): Turn map in allocmetrics into a heap of topK scores
   494  	iter.ctx.Metrics().ScoreNode(option.Node, "normalized-score", option.FinalScore)
   495  	return option
   496  }