github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/scheduler/stack.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"math"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/nomad/structs"
     8  )
     9  
    10  const (
    11  	// serviceJobAntiAffinityPenalty is the penalty applied
    12  	// to the score for placing an alloc on a node that
    13  	// already has an alloc for this job.
    14  	serviceJobAntiAffinityPenalty = 20.0
    15  
    16  	// batchJobAntiAffinityPenalty is the same as the
    17  	// serviceJobAntiAffinityPenalty but for batch type jobs.
    18  	batchJobAntiAffinityPenalty = 10.0
    19  )
    20  
    21  // Stack is a chained collection of iterators. The stack is used to
    22  // make placement decisions. Different schedulers may customize the
    23  // stack they use to vary the way placements are made.
    24  type Stack interface {
    25  	// SetNodes is used to set the base set of potential nodes
    26  	SetNodes([]*structs.Node)
    27  
    28  	// SetTaskGroup is used to set the job for selection
    29  	SetJob(job *structs.Job)
    30  
    31  	// Select is used to select a node for the task group
    32  	Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources)
    33  }
    34  
    35  // GenericStack is the Stack used for the Generic scheduler. It is
    36  // designed to make better placement decisions at the cost of performance.
    37  type GenericStack struct {
    38  	batch  bool
    39  	ctx    Context
    40  	source *StaticIterator
    41  
    42  	wrappedChecks       *FeasibilityWrapper
    43  	jobConstraint       *ConstraintChecker
    44  	taskGroupDrivers    *DriverChecker
    45  	taskGroupConstraint *ConstraintChecker
    46  
    47  	distinctHostsConstraint    *DistinctHostsIterator
    48  	distinctPropertyConstraint *DistinctPropertyIterator
    49  	binPack                    *BinPackIterator
    50  	jobAntiAff                 *JobAntiAffinityIterator
    51  	limit                      *LimitIterator
    52  	maxScore                   *MaxScoreIterator
    53  }
    54  
    55  // NewGenericStack constructs a stack used for selecting service placements
    56  func NewGenericStack(batch bool, ctx Context) *GenericStack {
    57  	// Create a new stack
    58  	s := &GenericStack{
    59  		batch: batch,
    60  		ctx:   ctx,
    61  	}
    62  
    63  	// Create the source iterator. We randomize the order we visit nodes
    64  	// to reduce collisions between schedulers and to do a basic load
    65  	// balancing across eligible nodes.
    66  	s.source = NewRandomIterator(ctx, nil)
    67  
    68  	// Attach the job constraints. The job is filled in later.
    69  	s.jobConstraint = NewConstraintChecker(ctx, nil)
    70  
    71  	// Filter on task group drivers first as they are faster
    72  	s.taskGroupDrivers = NewDriverChecker(ctx, nil)
    73  
    74  	// Filter on task group constraints second
    75  	s.taskGroupConstraint = NewConstraintChecker(ctx, nil)
    76  
    77  	// Create the feasibility wrapper which wraps all feasibility checks in
    78  	// which feasibility checking can be skipped if the computed node class has
    79  	// previously been marked as eligible or ineligible. Generally this will be
    80  	// checks that only needs to examine the single node to determine feasibility.
    81  	jobs := []FeasibilityChecker{s.jobConstraint}
    82  	tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint}
    83  	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs)
    84  
    85  	// Filter on distinct host constraints.
    86  	s.distinctHostsConstraint = NewDistinctHostsIterator(ctx, s.wrappedChecks)
    87  
    88  	// Filter on distinct property constraints.
    89  	s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.distinctHostsConstraint)
    90  
    91  	// Upgrade from feasible to rank iterator
    92  	rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint)
    93  
    94  	// Apply the bin packing, this depends on the resources needed
    95  	// by a particular task group. Only enable eviction for the service
    96  	// scheduler as that logic is expensive.
    97  	evict := !batch
    98  	s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0)
    99  
   100  	// Apply the job anti-affinity iterator. This is to avoid placing
   101  	// multiple allocations on the same node for this job. The penalty
   102  	// is less for batch jobs as it matters less.
   103  	penalty := serviceJobAntiAffinityPenalty
   104  	if batch {
   105  		penalty = batchJobAntiAffinityPenalty
   106  	}
   107  	s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
   108  
   109  	// Apply a limit function. This is to avoid scanning *every* possible node.
   110  	s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
   111  
   112  	// Select the node with the maximum score for placement
   113  	s.maxScore = NewMaxScoreIterator(ctx, s.limit)
   114  	return s
   115  }
   116  
   117  func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {
   118  	// Shuffle base nodes
   119  	shuffleNodes(baseNodes)
   120  
   121  	// Update the set of base nodes
   122  	s.source.SetNodes(baseNodes)
   123  
   124  	// Apply a limit function. This is to avoid scanning *every* possible node.
   125  	// For batch jobs we only need to evaluate 2 options and depend on the
   126  	// power of two choices. For services jobs we need to visit "enough".
   127  	// Using a log of the total number of nodes is a good restriction, with
   128  	// at least 2 as the floor
   129  	limit := 2
   130  	if n := len(baseNodes); !s.batch && n > 0 {
   131  		logLimit := int(math.Ceil(math.Log2(float64(n))))
   132  		if logLimit > limit {
   133  			limit = logLimit
   134  		}
   135  	}
   136  	s.limit.SetLimit(limit)
   137  }
   138  
   139  func (s *GenericStack) SetJob(job *structs.Job) {
   140  	s.jobConstraint.SetConstraints(job.Constraints)
   141  	s.distinctHostsConstraint.SetJob(job)
   142  	s.distinctPropertyConstraint.SetJob(job)
   143  	s.binPack.SetPriority(job.Priority)
   144  	s.jobAntiAff.SetJob(job.ID)
   145  	s.ctx.Eligibility().SetJob(job)
   146  }
   147  
   148  func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   149  	// Reset the max selector and context
   150  	s.maxScore.Reset()
   151  	s.ctx.Reset()
   152  	start := time.Now()
   153  
   154  	// Get the task groups constraints.
   155  	tgConstr := taskGroupConstraints(tg)
   156  
   157  	// Update the parameters of iterators
   158  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   159  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   160  	s.distinctHostsConstraint.SetTaskGroup(tg)
   161  	s.distinctPropertyConstraint.SetTaskGroup(tg)
   162  	s.wrappedChecks.SetTaskGroup(tg.Name)
   163  	s.binPack.SetTaskGroup(tg)
   164  
   165  	// Find the node with the max score
   166  	option := s.maxScore.Next()
   167  
   168  	// Ensure that the task resources were specified
   169  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   170  		for _, task := range tg.Tasks {
   171  			option.SetTaskResources(task, task.Resources)
   172  		}
   173  	}
   174  
   175  	// Store the compute time
   176  	s.ctx.Metrics().AllocationTime = time.Since(start)
   177  	return option, tgConstr.size
   178  }
   179  
   180  // SelectPreferredNode returns a node where an allocation of the task group can
   181  // be placed, the node passed to it is preferred over the other available nodes
   182  func (s *GenericStack) SelectPreferringNodes(tg *structs.TaskGroup, nodes []*structs.Node) (*RankedNode, *structs.Resources) {
   183  	originalNodes := s.source.nodes
   184  	s.source.SetNodes(nodes)
   185  	if option, resources := s.Select(tg); option != nil {
   186  		s.source.SetNodes(originalNodes)
   187  		return option, resources
   188  	}
   189  	s.source.SetNodes(originalNodes)
   190  	return s.Select(tg)
   191  }
   192  
   193  // SystemStack is the Stack used for the System scheduler. It is designed to
   194  // attempt to make placements on all nodes.
   195  type SystemStack struct {
   196  	ctx                        Context
   197  	source                     *StaticIterator
   198  	wrappedChecks              *FeasibilityWrapper
   199  	jobConstraint              *ConstraintChecker
   200  	taskGroupDrivers           *DriverChecker
   201  	taskGroupConstraint        *ConstraintChecker
   202  	distinctPropertyConstraint *DistinctPropertyIterator
   203  	binPack                    *BinPackIterator
   204  }
   205  
   206  // NewSystemStack constructs a stack used for selecting service placements
   207  func NewSystemStack(ctx Context) *SystemStack {
   208  	// Create a new stack
   209  	s := &SystemStack{ctx: ctx}
   210  
   211  	// Create the source iterator. We visit nodes in a linear order because we
   212  	// have to evaluate on all nodes.
   213  	s.source = NewStaticIterator(ctx, nil)
   214  
   215  	// Attach the job constraints. The job is filled in later.
   216  	s.jobConstraint = NewConstraintChecker(ctx, nil)
   217  
   218  	// Filter on task group drivers first as they are faster
   219  	s.taskGroupDrivers = NewDriverChecker(ctx, nil)
   220  
   221  	// Filter on task group constraints second
   222  	s.taskGroupConstraint = NewConstraintChecker(ctx, nil)
   223  
   224  	// Create the feasibility wrapper which wraps all feasibility checks in
   225  	// which feasibility checking can be skipped if the computed node class has
   226  	// previously been marked as eligible or ineligible. Generally this will be
   227  	// checks that only needs to examine the single node to determine feasibility.
   228  	jobs := []FeasibilityChecker{s.jobConstraint}
   229  	tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint}
   230  	s.wrappedChecks = NewFeasibilityWrapper(ctx, s.source, jobs, tgs)
   231  
   232  	// Filter on distinct property constraints.
   233  	s.distinctPropertyConstraint = NewDistinctPropertyIterator(ctx, s.wrappedChecks)
   234  
   235  	// Upgrade from feasible to rank iterator
   236  	rankSource := NewFeasibleRankIterator(ctx, s.distinctPropertyConstraint)
   237  
   238  	// Apply the bin packing, this depends on the resources needed
   239  	// by a particular task group. Enable eviction as system jobs are high
   240  	// priority.
   241  	s.binPack = NewBinPackIterator(ctx, rankSource, true, 0)
   242  	return s
   243  }
   244  
   245  func (s *SystemStack) SetNodes(baseNodes []*structs.Node) {
   246  	// Update the set of base nodes
   247  	s.source.SetNodes(baseNodes)
   248  }
   249  
   250  func (s *SystemStack) SetJob(job *structs.Job) {
   251  	s.jobConstraint.SetConstraints(job.Constraints)
   252  	s.distinctPropertyConstraint.SetJob(job)
   253  	s.binPack.SetPriority(job.Priority)
   254  	s.ctx.Eligibility().SetJob(job)
   255  }
   256  
   257  func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   258  	// Reset the binpack selector and context
   259  	s.binPack.Reset()
   260  	s.ctx.Reset()
   261  	start := time.Now()
   262  
   263  	// Get the task groups constraints.
   264  	tgConstr := taskGroupConstraints(tg)
   265  
   266  	// Update the parameters of iterators
   267  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   268  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   269  	s.wrappedChecks.SetTaskGroup(tg.Name)
   270  	s.distinctPropertyConstraint.SetTaskGroup(tg)
   271  	s.binPack.SetTaskGroup(tg)
   272  
   273  	// Get the next option that satisfies the constraints.
   274  	option := s.binPack.Next()
   275  
   276  	// Ensure that the task resources were specified
   277  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   278  		for _, task := range tg.Tasks {
   279  			option.SetTaskResources(task, task.Resources)
   280  		}
   281  	}
   282  
   283  	// Store the compute time
   284  	s.ctx.Metrics().AllocationTime = time.Since(start)
   285  	return option, tgConstr.size
   286  }