github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/scheduler/stack.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"math"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/nomad/structs"
     8  )
     9  
    10  const (
    11  	// serviceJobAntiAffinityPenalty is the penalty applied
    12  	// to the score for placing an alloc on a node that
    13  	// already has an alloc for this job.
    14  	serviceJobAntiAffinityPenalty = 10.0
    15  
    16  	// batchJobAntiAffinityPenalty is the same as the
    17  	// serviceJobAntiAffinityPenalty but for batch type jobs.
    18  	batchJobAntiAffinityPenalty = 5.0
    19  )
    20  
    21  // Stack is a chained collection of iterators. The stack is used to
    22  // make placement decisions. Different schedulers may customize the
    23  // stack they use to vary the way placements are made.
    24  type Stack interface {
    25  	// SetNodes is used to set the base set of potential nodes
    26  	SetNodes([]*structs.Node)
    27  
    28  	// SetTaskGroup is used to set the job for selection
    29  	SetJob(job *structs.Job)
    30  
    31  	// Select is used to select a node for the task group
    32  	Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources)
    33  }
    34  
    35  // GenericStack is the Stack used for the Generic scheduler. It is
    36  // designed to make better placement decisions at the cost of performance.
    37  type GenericStack struct {
    38  	batch  bool
    39  	ctx    Context
    40  	source *StaticIterator
    41  
    42  	wrappedChecks       *FeasibilityWrapper
    43  	jobConstraint       *ConstraintChecker
    44  	taskGroupDrivers    *DriverChecker
    45  	taskGroupConstraint *ConstraintChecker
    46  
    47  	proposedAllocConstraint *ProposedAllocConstraintIterator
    48  	binPack                 *BinPackIterator
    49  	jobAntiAff              *JobAntiAffinityIterator
    50  	limit                   *LimitIterator
    51  	maxScore                *MaxScoreIterator
    52  }
    53  
    54  // NewGenericStack constructs a stack used for selecting service placements
    55  func NewGenericStack(batch bool, ctx Context) *GenericStack {
    56  	// Create a new stack
    57  	s := &GenericStack{
    58  		batch: batch,
    59  		ctx:   ctx,
    60  	}
    61  
    62  	// Create the source iterator. We randomize the order we visit nodes
    63  	// to reduce collisions between schedulers and to do a basic load
    64  	// balancing across eligible nodes.
    65  	s.source = NewRandomIterator(ctx, nil)
    66  
    67  	// Attach the job constraints. The job is filled in later.
    68  	s.jobConstraint = NewConstraintChecker(ctx, nil)
    69  
    70  	// Filter on task group drivers first as they are faster
    71  	s.taskGroupDrivers = NewDriverChecker(ctx, nil)
    72  
    73  	// Filter on task group constraints second
    74  	s.taskGroupConstraint = NewConstraintChecker(ctx, nil)
    75  
    76  	// Create the feasibility wrapper which wraps all feasibility checks in
    77  	// which feasibility checking can be skipped if the computed node class has
    78  	// previously been marked as eligible or ineligible. Generally this will be
    79  	// checks that only needs to examine the single node to determine feasibility.
    80  	jobs := []FeasibilityChecker{s.jobConstraint}
    81  	tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint}
    82  	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs)
    83  
    84  	// Filter on constraints that are affected by propsed allocations.
    85  	s.proposedAllocConstraint = NewProposedAllocConstraintIterator(ctx, s.wrappedChecks)
    86  
    87  	// Upgrade from feasible to rank iterator
    88  	rankSource := NewFeasibleRankIterator(ctx, s.proposedAllocConstraint)
    89  
    90  	// Apply the bin packing, this depends on the resources needed
    91  	// by a particular task group. Only enable eviction for the service
    92  	// scheduler as that logic is expensive.
    93  	evict := !batch
    94  	s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0)
    95  
    96  	// Apply the job anti-affinity iterator. This is to avoid placing
    97  	// multiple allocations on the same node for this job. The penalty
    98  	// is less for batch jobs as it matters less.
    99  	penalty := serviceJobAntiAffinityPenalty
   100  	if batch {
   101  		penalty = batchJobAntiAffinityPenalty
   102  	}
   103  	s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
   104  
   105  	// Apply a limit function. This is to avoid scanning *every* possible node.
   106  	s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
   107  
   108  	// Select the node with the maximum score for placement
   109  	s.maxScore = NewMaxScoreIterator(ctx, s.limit)
   110  	return s
   111  }
   112  
   113  func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {
   114  	// Shuffle base nodes
   115  	shuffleNodes(baseNodes)
   116  
   117  	// Update the set of base nodes
   118  	s.source.SetNodes(baseNodes)
   119  
   120  	// Apply a limit function. This is to avoid scanning *every* possible node.
   121  	// For batch jobs we only need to evaluate 2 options and depend on the
   122  	// power of two choices. For services jobs we need to visit "enough".
   123  	// Using a log of the total number of nodes is a good restriction, with
   124  	// at least 2 as the floor
   125  	limit := 2
   126  	if n := len(baseNodes); !s.batch && n > 0 {
   127  		logLimit := int(math.Ceil(math.Log2(float64(n))))
   128  		if logLimit > limit {
   129  			limit = logLimit
   130  		}
   131  	}
   132  	s.limit.SetLimit(limit)
   133  }
   134  
   135  func (s *GenericStack) SetJob(job *structs.Job) {
   136  	s.jobConstraint.SetConstraints(job.Constraints)
   137  	s.proposedAllocConstraint.SetJob(job)
   138  	s.binPack.SetPriority(job.Priority)
   139  	s.jobAntiAff.SetJob(job.ID)
   140  	s.ctx.Eligibility().SetJob(job)
   141  }
   142  
   143  func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   144  	// Reset the max selector and context
   145  	s.maxScore.Reset()
   146  	s.ctx.Reset()
   147  	start := time.Now()
   148  
   149  	// Get the task groups constraints.
   150  	tgConstr := taskGroupConstraints(tg)
   151  
   152  	// Update the parameters of iterators
   153  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   154  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   155  	s.proposedAllocConstraint.SetTaskGroup(tg)
   156  	s.wrappedChecks.SetTaskGroup(tg.Name)
   157  	s.binPack.SetTaskGroup(tg)
   158  
   159  	// Find the node with the max score
   160  	option := s.maxScore.Next()
   161  
   162  	// Ensure that the task resources were specified
   163  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   164  		for _, task := range tg.Tasks {
   165  			option.SetTaskResources(task, task.Resources)
   166  		}
   167  	}
   168  
   169  	// Store the compute time
   170  	s.ctx.Metrics().AllocationTime = time.Since(start)
   171  	return option, tgConstr.size
   172  }
   173  
   174  // SelectPreferredNode returns a node where an allocation of the task group can
   175  // be placed, the node passed to it is preferred over the other available nodes
   176  func (s *GenericStack) SelectPreferringNodes(tg *structs.TaskGroup, nodes []*structs.Node) (*RankedNode, *structs.Resources) {
   177  	originalNodes := s.source.nodes
   178  	s.source.SetNodes(nodes)
   179  	if option, resources := s.Select(tg); option != nil {
   180  		s.source.SetNodes(originalNodes)
   181  		return option, resources
   182  	}
   183  	s.source.SetNodes(originalNodes)
   184  	return s.Select(tg)
   185  }
   186  
   187  // SystemStack is the Stack used for the System scheduler. It is designed to
   188  // attempt to make placements on all nodes.
   189  type SystemStack struct {
   190  	ctx                 Context
   191  	source              *StaticIterator
   192  	wrappedChecks       *FeasibilityWrapper
   193  	jobConstraint       *ConstraintChecker
   194  	taskGroupDrivers    *DriverChecker
   195  	taskGroupConstraint *ConstraintChecker
   196  	binPack             *BinPackIterator
   197  }
   198  
   199  // NewSystemStack constructs a stack used for selecting service placements
   200  func NewSystemStack(ctx Context) *SystemStack {
   201  	// Create a new stack
   202  	s := &SystemStack{ctx: ctx}
   203  
   204  	// Create the source iterator. We visit nodes in a linear order because we
   205  	// have to evaluate on all nodes.
   206  	s.source = NewStaticIterator(ctx, nil)
   207  
   208  	// Attach the job constraints. The job is filled in later.
   209  	s.jobConstraint = NewConstraintChecker(ctx, nil)
   210  
   211  	// Filter on task group drivers first as they are faster
   212  	s.taskGroupDrivers = NewDriverChecker(ctx, nil)
   213  
   214  	// Filter on task group constraints second
   215  	s.taskGroupConstraint = NewConstraintChecker(ctx, nil)
   216  
   217  	// Create the feasibility wrapper which wraps all feasibility checks in
   218  	// which feasibility checking can be skipped if the computed node class has
   219  	// previously been marked as eligible or ineligible. Generally this will be
   220  	// checks that only needs to examine the single node to determine feasibility.
   221  	jobs := []FeasibilityChecker{s.jobConstraint}
   222  	tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint}
   223  	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs)
   224  
   225  	// Upgrade from feasible to rank iterator
   226  	rankSource := NewFeasibleRankIterator(ctx, s.wrappedChecks)
   227  
   228  	// Apply the bin packing, this depends on the resources needed
   229  	// by a particular task group. Enable eviction as system jobs are high
   230  	// priority.
   231  	s.binPack = NewBinPackIterator(ctx, rankSource, true, 0)
   232  	return s
   233  }
   234  
   235  func (s *SystemStack) SetNodes(baseNodes []*structs.Node) {
   236  	// Update the set of base nodes
   237  	s.source.SetNodes(baseNodes)
   238  }
   239  
   240  func (s *SystemStack) SetJob(job *structs.Job) {
   241  	s.jobConstraint.SetConstraints(job.Constraints)
   242  	s.binPack.SetPriority(job.Priority)
   243  	s.ctx.Eligibility().SetJob(job)
   244  }
   245  
   246  func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   247  	// Reset the binpack selector and context
   248  	s.binPack.Reset()
   249  	s.ctx.Reset()
   250  	start := time.Now()
   251  
   252  	// Get the task groups constraints.
   253  	tgConstr := taskGroupConstraints(tg)
   254  
   255  	// Update the parameters of iterators
   256  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   257  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   258  	s.binPack.SetTaskGroup(tg)
   259  	s.wrappedChecks.SetTaskGroup(tg.Name)
   260  
   261  	// Get the next option that satisfies the constraints.
   262  	option := s.binPack.Next()
   263  
   264  	// Ensure that the task resources were specified
   265  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   266  		for _, task := range tg.Tasks {
   267  			option.SetTaskResources(task, task.Resources)
   268  		}
   269  	}
   270  
   271  	// Store the compute time
   272  	s.ctx.Metrics().AllocationTime = time.Since(start)
   273  	return option, tgConstr.size
   274  }