github.com/djenriquez/nomad-1@v0.8.1/scheduler/rank.go

github.com/djenriquez/nomad-1@v0.8.1/scheduler/rank.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/hashicorp/nomad/nomad/structs"
     7  )
     8  
     9  // Rank is used to provide a score and various ranking metadata
    10  // along with a node when iterating. This state can be modified as
    11  // various rank methods are applied.
    12  type RankedNode struct {
    13  	Node          *structs.Node
    14  	Score         float64
    15  	TaskResources map[string]*structs.Resources
    16  
    17  	// Allocs is used to cache the proposed allocations on the
    18  	// node. This can be shared between iterators that require it.
    19  	Proposed []*structs.Allocation
    20  }
    21  
    22  func (r *RankedNode) GoString() string {
    23  	return fmt.Sprintf("<Node: %s Score: %0.3f>", r.Node.ID, r.Score)
    24  }
    25  
    26  func (r *RankedNode) ProposedAllocs(ctx Context) ([]*structs.Allocation, error) {
    27  	if r.Proposed != nil {
    28  		return r.Proposed, nil
    29  	}
    30  
    31  	p, err := ctx.ProposedAllocs(r.Node.ID)
    32  	if err != nil {
    33  		return nil, err
    34  	}
    35  	r.Proposed = p
    36  	return p, nil
    37  }
    38  
    39  func (r *RankedNode) SetTaskResources(task *structs.Task,
    40  	resource *structs.Resources) {
    41  	if r.TaskResources == nil {
    42  		r.TaskResources = make(map[string]*structs.Resources)
    43  	}
    44  	r.TaskResources[task.Name] = resource
    45  }
    46  
    47  // RankFeasibleIterator is used to iteratively yield nodes along
    48  // with ranking metadata. The iterators may manage some state for
    49  // performance optimizations.
    50  type RankIterator interface {
    51  	// Next yields a ranked option or nil if exhausted
    52  	Next() *RankedNode
    53  
    54  	// Reset is invoked when an allocation has been placed
    55  	// to reset any stale state.
    56  	Reset()
    57  }
    58  
    59  // FeasibleRankIterator is used to consume from a FeasibleIterator
    60  // and return an unranked node with base ranking.
    61  type FeasibleRankIterator struct {
    62  	ctx    Context
    63  	source FeasibleIterator
    64  }
    65  
    66  // NewFeasibleRankIterator is used to return a new FeasibleRankIterator
    67  // from a FeasibleIterator source.
    68  func NewFeasibleRankIterator(ctx Context, source FeasibleIterator) *FeasibleRankIterator {
    69  	iter := &FeasibleRankIterator{
    70  		ctx:    ctx,
    71  		source: source,
    72  	}
    73  	return iter
    74  }
    75  
    76  func (iter *FeasibleRankIterator) Next() *RankedNode {
    77  	option := iter.source.Next()
    78  	if option == nil {
    79  		return nil
    80  	}
    81  	ranked := &RankedNode{
    82  		Node: option,
    83  	}
    84  	return ranked
    85  }
    86  
    87  func (iter *FeasibleRankIterator) Reset() {
    88  	iter.source.Reset()
    89  }
    90  
    91  // StaticRankIterator is a RankIterator that returns a static set of results.
    92  // This is largely only useful for testing.
    93  type StaticRankIterator struct {
    94  	ctx    Context
    95  	nodes  []*RankedNode
    96  	offset int
    97  	seen   int
    98  }
    99  
   100  // NewStaticRankIterator returns a new static rank iterator over the given nodes
   101  func NewStaticRankIterator(ctx Context, nodes []*RankedNode) *StaticRankIterator {
   102  	iter := &StaticRankIterator{
   103  		ctx:   ctx,
   104  		nodes: nodes,
   105  	}
   106  	return iter
   107  }
   108  
   109  func (iter *StaticRankIterator) Next() *RankedNode {
   110  	// Check if exhausted
   111  	n := len(iter.nodes)
   112  	if iter.offset == n || iter.seen == n {
   113  		if iter.seen != n {
   114  			iter.offset = 0
   115  		} else {
   116  			return nil
   117  		}
   118  	}
   119  
   120  	// Return the next offset
   121  	offset := iter.offset
   122  	iter.offset += 1
   123  	iter.seen += 1
   124  	return iter.nodes[offset]
   125  }
   126  
   127  func (iter *StaticRankIterator) Reset() {
   128  	iter.seen = 0
   129  }
   130  
   131  // BinPackIterator is a RankIterator that scores potential options
   132  // based on a bin-packing algorithm.
   133  type BinPackIterator struct {
   134  	ctx       Context
   135  	source    RankIterator
   136  	evict     bool
   137  	priority  int
   138  	taskGroup *structs.TaskGroup
   139  }
   140  
   141  // NewBinPackIterator returns a BinPackIterator which tries to fit tasks
   142  // potentially evicting other tasks based on a given priority.
   143  func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int) *BinPackIterator {
   144  	iter := &BinPackIterator{
   145  		ctx:      ctx,
   146  		source:   source,
   147  		evict:    evict,
   148  		priority: priority,
   149  	}
   150  	return iter
   151  }
   152  
   153  func (iter *BinPackIterator) SetPriority(p int) {
   154  	iter.priority = p
   155  }
   156  
   157  func (iter *BinPackIterator) SetTaskGroup(taskGroup *structs.TaskGroup) {
   158  	iter.taskGroup = taskGroup
   159  }
   160  
   161  func (iter *BinPackIterator) Next() *RankedNode {
   162  OUTER:
   163  	for {
   164  		// Get the next potential option
   165  		option := iter.source.Next()
   166  		if option == nil {
   167  			return nil
   168  		}
   169  
   170  		// Get the proposed allocations
   171  		proposed, err := option.ProposedAllocs(iter.ctx)
   172  		if err != nil {
   173  			iter.ctx.Logger().Printf(
   174  				"[ERR] sched.binpack: failed to get proposed allocations: %v",
   175  				err)
   176  			continue
   177  		}
   178  
   179  		// Index the existing network usage
   180  		netIdx := structs.NewNetworkIndex()
   181  		netIdx.SetNode(option.Node)
   182  		netIdx.AddAllocs(proposed)
   183  
   184  		// Assign the resources for each task
   185  		total := &structs.Resources{
   186  			DiskMB: iter.taskGroup.EphemeralDisk.SizeMB,
   187  		}
   188  		for _, task := range iter.taskGroup.Tasks {
   189  			taskResources := task.Resources.Copy()
   190  
   191  			// Check if we need a network resource
   192  			if len(taskResources.Networks) > 0 {
   193  				ask := taskResources.Networks[0]
   194  				offer, err := netIdx.AssignNetwork(ask)
   195  				if offer == nil {
   196  					iter.ctx.Metrics().ExhaustedNode(option.Node,
   197  						fmt.Sprintf("network: %s", err))
   198  					netIdx.Release()
   199  					continue OUTER
   200  				}
   201  
   202  				// Reserve this to prevent another task from colliding
   203  				netIdx.AddReserved(offer)
   204  
   205  				// Update the network ask to the offer
   206  				taskResources.Networks = []*structs.NetworkResource{offer}
   207  			}
   208  
   209  			// Store the task resource
   210  			option.SetTaskResources(task, taskResources)
   211  
   212  			// Accumulate the total resource requirement
   213  			total.Add(taskResources)
   214  		}
   215  
   216  		// Add the resources we are trying to fit
   217  		proposed = append(proposed, &structs.Allocation{Resources: total})
   218  
   219  		// Check if these allocations fit, if they do not, simply skip this node
   220  		fit, dim, util, _ := structs.AllocsFit(option.Node, proposed, netIdx)
   221  		netIdx.Release()
   222  		if !fit {
   223  			iter.ctx.Metrics().ExhaustedNode(option.Node, dim)
   224  			continue
   225  		}
   226  
   227  		// XXX: For now we completely ignore evictions. We should use that flag
   228  		// to determine if its possible to evict other lower priority allocations
   229  		// to make room. This explodes the search space, so it must be done
   230  		// carefully.
   231  
   232  		// Score the fit normally otherwise
   233  		fitness := structs.ScoreFit(option.Node, util)
   234  		option.Score += fitness
   235  		iter.ctx.Metrics().ScoreNode(option.Node, "binpack", fitness)
   236  		return option
   237  	}
   238  }
   239  
   240  func (iter *BinPackIterator) Reset() {
   241  	iter.source.Reset()
   242  }
   243  
   244  // JobAntiAffinityIterator is used to apply an anti-affinity to allocating
   245  // along side other allocations from this job. This is used to help distribute
   246  // load across the cluster.
   247  type JobAntiAffinityIterator struct {
   248  	ctx     Context
   249  	source  RankIterator
   250  	penalty float64
   251  	jobID   string
   252  }
   253  
   254  // NewJobAntiAffinityIterator is used to create a JobAntiAffinityIterator that
   255  // applies the given penalty for co-placement with allocs from this job.
   256  func NewJobAntiAffinityIterator(ctx Context, source RankIterator, penalty float64, jobID string) *JobAntiAffinityIterator {
   257  	iter := &JobAntiAffinityIterator{
   258  		ctx:     ctx,
   259  		source:  source,
   260  		penalty: penalty,
   261  		jobID:   jobID,
   262  	}
   263  	return iter
   264  }
   265  
   266  func (iter *JobAntiAffinityIterator) SetJob(jobID string) {
   267  	iter.jobID = jobID
   268  }
   269  
   270  func (iter *JobAntiAffinityIterator) Next() *RankedNode {
   271  	for {
   272  		option := iter.source.Next()
   273  		if option == nil {
   274  			return nil
   275  		}
   276  
   277  		// Get the proposed allocations
   278  		proposed, err := option.ProposedAllocs(iter.ctx)
   279  		if err != nil {
   280  			iter.ctx.Logger().Printf(
   281  				"[ERR] sched.job-anti-aff: failed to get proposed allocations: %v",
   282  				err)
   283  			continue
   284  		}
   285  
   286  		// Determine the number of collisions
   287  		collisions := 0
   288  		for _, alloc := range proposed {
   289  			if alloc.JobID == iter.jobID {
   290  				collisions += 1
   291  			}
   292  		}
   293  
   294  		// Apply a penalty if there are collisions
   295  		if collisions > 0 {
   296  			scorePenalty := -1 * float64(collisions) * iter.penalty
   297  			option.Score += scorePenalty
   298  			iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
   299  		}
   300  		return option
   301  	}
   302  }
   303  
   304  func (iter *JobAntiAffinityIterator) Reset() {
   305  	iter.source.Reset()
   306  }
   307  
   308  // NodeAntiAffinityIterator is used to apply a penalty to
   309  // a node that had a previous failed allocation for the same job.
   310  // This is used when attempting to reschedule a failed alloc
   311  type NodeAntiAffinityIterator struct {
   312  	ctx          Context
   313  	source       RankIterator
   314  	penalty      float64
   315  	penaltyNodes map[string]struct{}
   316  }
   317  
   318  // NewNodeAntiAffinityIterator is used to create a NodeAntiAffinityIterator that
   319  // applies the given penalty for placement onto nodes in penaltyNodes
   320  func NewNodeAntiAffinityIterator(ctx Context, source RankIterator, penalty float64) *NodeAntiAffinityIterator {
   321  	iter := &NodeAntiAffinityIterator{
   322  		ctx:     ctx,
   323  		source:  source,
   324  		penalty: penalty,
   325  	}
   326  	return iter
   327  }
   328  
   329  func (iter *NodeAntiAffinityIterator) SetPenaltyNodes(penaltyNodes map[string]struct{}) {
   330  	iter.penaltyNodes = penaltyNodes
   331  }
   332  
   333  func (iter *NodeAntiAffinityIterator) Next() *RankedNode {
   334  	for {
   335  		option := iter.source.Next()
   336  		if option == nil {
   337  			return nil
   338  		}
   339  
   340  		_, ok := iter.penaltyNodes[option.Node.ID]
   341  		if ok {
   342  			option.Score -= iter.penalty
   343  			iter.ctx.Metrics().ScoreNode(option.Node, "node-anti-affinity", iter.penalty)
   344  		}
   345  		return option
   346  	}
   347  }
   348  
   349  func (iter *NodeAntiAffinityIterator) Reset() {
   350  	iter.penaltyNodes = make(map[string]struct{})
   351  	iter.source.Reset()
   352  }