github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/scheduler/stack.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"math"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/nomad/structs"
     8  )
     9  
    10  const (
    11  	// serviceJobAntiAffinityPenalty is the penalty applied
    12  	// to the score for placing an alloc on a node that
    13  	// already has an alloc for this job.
    14  	serviceJobAntiAffinityPenalty = 10.0
    15  
    16  	// batchJobAntiAffinityPenalty is the same as the
    17  	// serviceJobAntiAffinityPenalty but for batch type jobs.
    18  	batchJobAntiAffinityPenalty = 5.0
    19  )
    20  
    21  // Stack is a chained collection of iterators. The stack is used to
    22  // make placement decisions. Different schedulers may customize the
    23  // stack they use to vary the way placements are made.
    24  type Stack interface {
    25  	// SetNodes is used to set the base set of potential nodes
    26  	SetNodes([]*structs.Node)
    27  
    28  	// SetTaskGroup is used to set the job for selection
    29  	SetJob(job *structs.Job)
    30  
    31  	// Select is used to select a node for the task group
    32  	Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources)
    33  }
    34  
    35  // GenericStack is the Stack used for the Generic scheduler. It is
    36  // designed to make better placement decisions at the cost of performance.
    37  type GenericStack struct {
    38  	batch               bool
    39  	ctx                 Context
    40  	source              *StaticIterator
    41  	jobConstraint       *ConstraintIterator
    42  	taskGroupDrivers    *DriverIterator
    43  	taskGroupConstraint *ConstraintIterator
    44  	binPack             *BinPackIterator
    45  	jobAntiAff          *JobAntiAffinityIterator
    46  	limit               *LimitIterator
    47  	maxScore            *MaxScoreIterator
    48  }
    49  
    50  // NewGenericStack constructs a stack used for selecting service placements
    51  func NewGenericStack(batch bool, ctx Context) *GenericStack {
    52  	// Create a new stack
    53  	s := &GenericStack{
    54  		batch: batch,
    55  		ctx:   ctx,
    56  	}
    57  
    58  	// Create the source iterator. We randomize the order we visit nodes
    59  	// to reduce collisions between schedulers and to do a basic load
    60  	// balancing across eligible nodes.
    61  	s.source = NewRandomIterator(ctx, nil)
    62  
    63  	// Attach the job constraints. The job is filled in later.
    64  	s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
    65  
    66  	// Filter on task group drivers first as they are faster
    67  	s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil)
    68  
    69  	// Filter on task group constraints second
    70  	s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)
    71  
    72  	// Upgrade from feasible to rank iterator
    73  	rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
    74  
    75  	// Apply the bin packing, this depends on the resources needed
    76  	// by a particular task group. Only enable eviction for the service
    77  	// scheduler as that logic is expensive.
    78  	evict := !batch
    79  	s.binPack = NewBinPackIterator(ctx, rankSource, evict, 0)
    80  
    81  	// Apply the job anti-affinity iterator. This is to avoid placing
    82  	// multiple allocations on the same node for this job. The penalty
    83  	// is less for batch jobs as it matters less.
    84  	penalty := serviceJobAntiAffinityPenalty
    85  	if batch {
    86  		penalty = batchJobAntiAffinityPenalty
    87  	}
    88  	s.jobAntiAff = NewJobAntiAffinityIterator(ctx, s.binPack, penalty, "")
    89  
    90  	// Apply a limit function. This is to avoid scanning *every* possible node.
    91  	s.limit = NewLimitIterator(ctx, s.jobAntiAff, 2)
    92  
    93  	// Select the node with the maximum score for placement
    94  	s.maxScore = NewMaxScoreIterator(ctx, s.limit)
    95  	return s
    96  }
    97  
    98  func (s *GenericStack) SetNodes(baseNodes []*structs.Node) {
    99  	// Shuffle base nodes
   100  	shuffleNodes(baseNodes)
   101  
   102  	// Update the set of base nodes
   103  	s.source.SetNodes(baseNodes)
   104  
   105  	// Apply a limit function. This is to avoid scanning *every* possible node.
   106  	// For batch jobs we only need to evaluate 2 options and depend on the
   107  	// power of two choices. For services jobs we need to visit "enough".
   108  	// Using a log of the total number of nodes is a good restriction, with
   109  	// at least 2 as the floor
   110  	limit := 2
   111  	if n := len(baseNodes); !s.batch && n > 0 {
   112  		logLimit := int(math.Ceil(math.Log2(float64(n))))
   113  		if logLimit > limit {
   114  			limit = logLimit
   115  		}
   116  	}
   117  	s.limit.SetLimit(limit)
   118  }
   119  
   120  func (s *GenericStack) SetJob(job *structs.Job) {
   121  	s.jobConstraint.SetConstraints(job.Constraints)
   122  	s.binPack.SetPriority(job.Priority)
   123  	s.jobAntiAff.SetJob(job.ID)
   124  }
   125  
   126  func (s *GenericStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   127  	// Reset the max selector and context
   128  	s.maxScore.Reset()
   129  	s.ctx.Reset()
   130  	start := time.Now()
   131  
   132  	// Get the task groups constraints.
   133  	tgConstr := taskGroupConstraints(tg)
   134  
   135  	// Update the parameters of iterators
   136  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   137  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   138  	s.binPack.SetTasks(tg.Tasks)
   139  
   140  	// Find the node with the max score
   141  	option := s.maxScore.Next()
   142  
   143  	// Ensure that the task resources were specified
   144  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   145  		for _, task := range tg.Tasks {
   146  			option.SetTaskResources(task, task.Resources)
   147  		}
   148  	}
   149  
   150  	// Store the compute time
   151  	s.ctx.Metrics().AllocationTime = time.Since(start)
   152  	return option, tgConstr.size
   153  }
   154  
   155  // SystemStack is the Stack used for the System scheduler. It is designed to
   156  // attempt to make placements on all nodes.
   157  type SystemStack struct {
   158  	ctx                 Context
   159  	source              *StaticIterator
   160  	jobConstraint       *ConstraintIterator
   161  	taskGroupDrivers    *DriverIterator
   162  	taskGroupConstraint *ConstraintIterator
   163  	binPack             *BinPackIterator
   164  }
   165  
   166  // NewSystemStack constructs a stack used for selecting service placements
   167  func NewSystemStack(ctx Context) *SystemStack {
   168  	// Create a new stack
   169  	s := &SystemStack{ctx: ctx}
   170  
   171  	// Create the source iterator. We visit nodes in a linear order because we
   172  	// have to evaluate on all nodes.
   173  	s.source = NewStaticIterator(ctx, nil)
   174  
   175  	// Attach the job constraints. The job is filled in later.
   176  	s.jobConstraint = NewConstraintIterator(ctx, s.source, nil)
   177  
   178  	// Filter on task group drivers first as they are faster
   179  	s.taskGroupDrivers = NewDriverIterator(ctx, s.jobConstraint, nil)
   180  
   181  	// Filter on task group constraints second
   182  	s.taskGroupConstraint = NewConstraintIterator(ctx, s.taskGroupDrivers, nil)
   183  
   184  	// Upgrade from feasible to rank iterator
   185  	rankSource := NewFeasibleRankIterator(ctx, s.taskGroupConstraint)
   186  
   187  	// Apply the bin packing, this depends on the resources needed
   188  	// by a particular task group. Enable eviction as system jobs are high
   189  	// priority.
   190  	s.binPack = NewBinPackIterator(ctx, rankSource, true, 0)
   191  	return s
   192  }
   193  
   194  func (s *SystemStack) SetNodes(baseNodes []*structs.Node) {
   195  	// Update the set of base nodes
   196  	s.source.SetNodes(baseNodes)
   197  }
   198  
   199  func (s *SystemStack) SetJob(job *structs.Job) {
   200  	s.jobConstraint.SetConstraints(job.Constraints)
   201  	s.binPack.SetPriority(job.Priority)
   202  }
   203  
   204  func (s *SystemStack) Select(tg *structs.TaskGroup) (*RankedNode, *structs.Resources) {
   205  	// Reset the binpack selector and context
   206  	s.binPack.Reset()
   207  	s.ctx.Reset()
   208  	start := time.Now()
   209  
   210  	// Get the task groups constraints.
   211  	tgConstr := taskGroupConstraints(tg)
   212  
   213  	// Update the parameters of iterators
   214  	s.taskGroupDrivers.SetDrivers(tgConstr.drivers)
   215  	s.taskGroupConstraint.SetConstraints(tgConstr.constraints)
   216  	s.binPack.SetTasks(tg.Tasks)
   217  
   218  	// Get the next option that satisfies the constraints.
   219  	option := s.binPack.Next()
   220  
   221  	// Ensure that the task resources were specified
   222  	if option != nil && len(option.TaskResources) != len(tg.Tasks) {
   223  		for _, task := range tg.Tasks {
   224  			option.SetTaskResources(task, task.Resources)
   225  		}
   226  	}
   227  
   228  	// Store the compute time
   229  	s.ctx.Metrics().AllocationTime = time.Since(start)
   230  	return option, tgConstr.size
   231  }