github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/blocked_evals.go

github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/blocked_evals.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	"github.com/armon/go-metrics"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  )
    10  
    11  const (
    12  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    13  	// should be large to ensure that the FSM doesn't block when calling Unblock
    14  	// as this would apply back-pressure on Raft.
    15  	unblockBuffer = 8096
    16  )
    17  
    18  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    19  // certain class of nodes becomes available. An evaluation is put into the
    20  // blocked state when it is run through the scheduler and produced failed
    21  // allocations. It is unblocked when the capacity of a node that could run the
    22  // failed allocation becomes available.
    23  type BlockedEvals struct {
    24  	evalBroker *EvalBroker
    25  	enabled    bool
    26  	stats      *BlockedStats
    27  	l          sync.RWMutex
    28  
    29  	// captured is the set of evaluations that are captured by computed node
    30  	// classes.
    31  	captured map[string]*structs.Evaluation
    32  
    33  	// escaped is the set of evaluations that have escaped computed node
    34  	// classes.
    35  	escaped map[string]*structs.Evaluation
    36  
    37  	// unblockCh is used to buffer unblocking of evaluations.
    38  	capacityChangeCh chan *capacityUpdate
    39  
    40  	// jobs is the map of blocked job and is used to ensure that only one
    41  	// blocked eval exists for each job.
    42  	jobs map[string]struct{}
    43  
    44  	// unblockIndexes maps computed node classes to the index in which they were
    45  	// unblocked. This is used to check if an evaluation could have been
    46  	// unblocked between the time they were in the scheduler and the time they
    47  	// are being blocked.
    48  	unblockIndexes map[string]uint64
    49  
    50  	// duplicates is the set of evaluations for jobs that had pre-existing
    51  	// blocked evaluations. These should be marked as cancelled since only one
    52  	// blocked eval is neeeded per job.
    53  	duplicates []*structs.Evaluation
    54  
    55  	// duplicateCh is used to signal that a duplicate eval was added to the
    56  	// duplicate set. It can be used to unblock waiting callers looking for
    57  	// duplicates.
    58  	duplicateCh chan struct{}
    59  
    60  	// stopCh is used to stop any created goroutines.
    61  	stopCh chan struct{}
    62  }
    63  
    64  // capacityUpdate stores unblock data.
    65  type capacityUpdate struct {
    66  	computedClass string
    67  	index         uint64
    68  }
    69  
    70  // BlockedStats returns all the stats about the blocked eval tracker.
    71  type BlockedStats struct {
    72  	// TotalEscaped is the total number of blocked evaluations that have escaped
    73  	// computed node classes.
    74  	TotalEscaped int
    75  
    76  	// TotalBlocked is the total number of blocked evaluations.
    77  	TotalBlocked int
    78  }
    79  
    80  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
    81  // unblocked evals into the passed broker.
    82  func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals {
    83  	return &BlockedEvals{
    84  		evalBroker:       evalBroker,
    85  		captured:         make(map[string]*structs.Evaluation),
    86  		escaped:          make(map[string]*structs.Evaluation),
    87  		jobs:             make(map[string]struct{}),
    88  		unblockIndexes:   make(map[string]uint64),
    89  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
    90  		duplicateCh:      make(chan struct{}, 1),
    91  		stopCh:           make(chan struct{}),
    92  		stats:            new(BlockedStats),
    93  	}
    94  }
    95  
    96  // Enabled is used to check if the broker is enabled.
    97  func (b *BlockedEvals) Enabled() bool {
    98  	b.l.RLock()
    99  	defer b.l.RUnlock()
   100  	return b.enabled
   101  }
   102  
   103  // SetEnabled is used to control if the broker is enabled. The broker
   104  // should only be enabled on the active leader.
   105  func (b *BlockedEvals) SetEnabled(enabled bool) {
   106  	b.l.Lock()
   107  	if b.enabled == enabled {
   108  		// No-op
   109  		return
   110  	} else if enabled {
   111  		go b.watchCapacity()
   112  	} else {
   113  		close(b.stopCh)
   114  	}
   115  	b.enabled = enabled
   116  	b.l.Unlock()
   117  	if !enabled {
   118  		b.Flush()
   119  	}
   120  }
   121  
   122  // Block tracks the passed evaluation and enqueues it into the eval broker when
   123  // a suitable node calls unblock.
   124  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   125  	b.l.Lock()
   126  	defer b.l.Unlock()
   127  
   128  	// Do nothing if not enabled
   129  	if !b.enabled {
   130  		return
   131  	}
   132  
   133  	// Check if the job already has a blocked evaluation. If it does add it to
   134  	// the list of duplicates. We omly ever want one blocked evaluation per job,
   135  	// otherwise we would create unnecessary work for the scheduler as multiple
   136  	// evals for the same job would be run, all producing the same outcome.
   137  	if _, existing := b.jobs[eval.JobID]; existing {
   138  		b.duplicates = append(b.duplicates, eval)
   139  
   140  		// Unblock any waiter.
   141  		select {
   142  		case b.duplicateCh <- struct{}{}:
   143  		default:
   144  		}
   145  
   146  		return
   147  	}
   148  
   149  	// Check if the eval missed an unblock while it was in the scheduler at an
   150  	// older index. The scheduler could have been invoked with a snapshot of
   151  	// state that was prior to additional capacity being added or allocations
   152  	// becoming terminal.
   153  	if b.missedUnblock(eval) {
   154  		// Just re-enqueue the eval immediately
   155  		b.evalBroker.Enqueue(eval)
   156  		return
   157  	}
   158  
   159  	// Mark the job as tracked.
   160  	b.stats.TotalBlocked++
   161  	b.jobs[eval.JobID] = struct{}{}
   162  
   163  	// If the eval has escaped, meaning computed node classes could not capture
   164  	// the constraints of the job, we store the eval separately as we have to
   165  	// unblock it whenever node capacity changes. This is because we don't know
   166  	// what node class is feasible for the jobs constraints.
   167  	if eval.EscapedComputedClass {
   168  		b.escaped[eval.ID] = eval
   169  		b.stats.TotalEscaped++
   170  		return
   171  	}
   172  
   173  	// Add the eval to the set of blocked evals whose jobs constraints are
   174  	// captured by computed node class.
   175  	b.captured[eval.ID] = eval
   176  }
   177  
   178  // missedUnblock returns whether an evaluation missed an unblock while it was in
   179  // the scheduler. Since the scheduler can operate at an index in the past, the
   180  // evaluation may have been processed missing data that would allow it to
   181  // complete. This method returns if that is the case and should be called with
   182  // the lock held.
   183  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   184  	var max uint64 = 0
   185  	for class, index := range b.unblockIndexes {
   186  		// Calculate the max unblock index
   187  		if max < index {
   188  			max = index
   189  		}
   190  
   191  		elig, ok := eval.ClassEligibility[class]
   192  		if !ok {
   193  			// The evaluation was processed and did not encounter this class.
   194  			// Thus for correctness we need to unblock it.
   195  			return true
   196  		}
   197  
   198  		// The evaluation could use the computed node class and the eval was
   199  		// processed before the last unblock.
   200  		if elig && eval.SnapshotIndex < index {
   201  			return true
   202  		}
   203  	}
   204  
   205  	// If the evaluation has escaped, and the map contains an index older than
   206  	// the evaluations, it should be unblocked.
   207  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   208  		return true
   209  	}
   210  
   211  	// The evaluation is ahead of all recent unblocks.
   212  	return false
   213  }
   214  
   215  // Unblock causes any evaluation that could potentially make progress on a
   216  // capacity change on the passed computed node class to be enqueued into the
   217  // eval broker.
   218  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   219  	b.l.Lock()
   220  
   221  	// Do nothing if not enabled
   222  	if !b.enabled {
   223  		b.l.Unlock()
   224  		return
   225  	}
   226  
   227  	// Store the index in which the unblock happened. We use this on subsequent
   228  	// block calls in case the evaluation was in the scheduler when a trigger
   229  	// occured.
   230  	b.unblockIndexes[computedClass] = index
   231  	b.l.Unlock()
   232  
   233  	b.capacityChangeCh <- &capacityUpdate{
   234  		computedClass: computedClass,
   235  		index:         index,
   236  	}
   237  }
   238  
   239  // watchCapacity is a long lived function that watches for capacity changes in
   240  // nodes and unblocks the correct set of evals.
   241  func (b *BlockedEvals) watchCapacity() {
   242  	for {
   243  		select {
   244  		case <-b.stopCh:
   245  			return
   246  		case update := <-b.capacityChangeCh:
   247  			b.unblock(update.computedClass, update.index)
   248  		}
   249  	}
   250  }
   251  
   252  // unblock unblocks all blocked evals that could run on the passed computed node
   253  // class.
   254  func (b *BlockedEvals) unblock(computedClass string, index uint64) {
   255  	b.l.Lock()
   256  	defer b.l.Unlock()
   257  
   258  	// Protect against the case of a flush.
   259  	if !b.enabled {
   260  		return
   261  	}
   262  
   263  	// Every eval that has escaped computed node class has to be unblocked
   264  	// because any node could potentially be feasible.
   265  	var unblocked []*structs.Evaluation
   266  	if l := len(b.escaped); l != 0 {
   267  		unblocked = make([]*structs.Evaluation, 0, l)
   268  		for id, eval := range b.escaped {
   269  			unblocked = append(unblocked, eval)
   270  			delete(b.escaped, id)
   271  			delete(b.jobs, eval.JobID)
   272  		}
   273  	}
   274  
   275  	// We unblock any eval that is explicitly eligible for the computed class
   276  	// and also any eval that is not eligible or uneligible. This signifies that
   277  	// when the evaluation was originally run through the scheduler, that it
   278  	// never saw a node with the given computed class and thus needs to be
   279  	// unblocked for correctness.
   280  	for id, eval := range b.captured {
   281  		if elig, ok := eval.ClassEligibility[computedClass]; ok && !elig {
   282  			// Can skip because the eval has explicitly marked the node class
   283  			// as ineligible.
   284  			continue
   285  		}
   286  
   287  		// The computed node class has never been seen by the eval so we unblock
   288  		// it.
   289  		unblocked = append(unblocked, eval)
   290  		delete(b.jobs, eval.JobID)
   291  		delete(b.captured, id)
   292  	}
   293  
   294  	if l := len(unblocked); l != 0 {
   295  		// Update the counters
   296  		b.stats.TotalEscaped = 0
   297  		b.stats.TotalBlocked -= l
   298  
   299  		// Enqueue all the unblocked evals into the broker.
   300  		b.evalBroker.EnqueueAll(unblocked)
   301  	}
   302  }
   303  
   304  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   305  // failure.
   306  func (b *BlockedEvals) UnblockFailed() {
   307  	b.l.Lock()
   308  	defer b.l.Unlock()
   309  
   310  	// Do nothing if not enabled
   311  	if !b.enabled {
   312  		return
   313  	}
   314  
   315  	var unblock []*structs.Evaluation
   316  	for id, eval := range b.captured {
   317  		if eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   318  			unblock = append(unblock, eval)
   319  			delete(b.captured, id)
   320  		}
   321  	}
   322  
   323  	for id, eval := range b.escaped {
   324  		if eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   325  			unblock = append(unblock, eval)
   326  			delete(b.escaped, id)
   327  		}
   328  	}
   329  
   330  	b.evalBroker.EnqueueAll(unblock)
   331  }
   332  
   333  // GetDuplicates returns all the duplicate evaluations and blocks until the
   334  // passed timeout.
   335  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   336  	var timeoutTimer *time.Timer
   337  	var timeoutCh <-chan time.Time
   338  SCAN:
   339  	b.l.Lock()
   340  	if len(b.duplicates) != 0 {
   341  		dups := b.duplicates
   342  		b.duplicates = nil
   343  		b.l.Unlock()
   344  		return dups
   345  	}
   346  	b.l.Unlock()
   347  
   348  	// Create the timer
   349  	if timeoutTimer == nil && timeout != 0 {
   350  		timeoutTimer = time.NewTimer(timeout)
   351  		timeoutCh = timeoutTimer.C
   352  		defer timeoutTimer.Stop()
   353  	}
   354  
   355  	select {
   356  	case <-b.stopCh:
   357  		return nil
   358  	case <-timeoutCh:
   359  		return nil
   360  	case <-b.duplicateCh:
   361  		goto SCAN
   362  	}
   363  }
   364  
   365  // Flush is used to clear the state of blocked evaluations.
   366  func (b *BlockedEvals) Flush() {
   367  	b.l.Lock()
   368  	defer b.l.Unlock()
   369  
   370  	// Reset the blocked eval tracker.
   371  	b.stats.TotalEscaped = 0
   372  	b.stats.TotalBlocked = 0
   373  	b.captured = make(map[string]*structs.Evaluation)
   374  	b.escaped = make(map[string]*structs.Evaluation)
   375  	b.jobs = make(map[string]struct{})
   376  	b.duplicates = nil
   377  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   378  	b.stopCh = make(chan struct{})
   379  	b.duplicateCh = make(chan struct{}, 1)
   380  }
   381  
   382  // Stats is used to query the state of the blocked eval tracker.
   383  func (b *BlockedEvals) Stats() *BlockedStats {
   384  	// Allocate a new stats struct
   385  	stats := new(BlockedStats)
   386  
   387  	b.l.RLock()
   388  	defer b.l.RUnlock()
   389  
   390  	// Copy all the stats
   391  	stats.TotalEscaped = b.stats.TotalEscaped
   392  	stats.TotalBlocked = b.stats.TotalBlocked
   393  	return stats
   394  }
   395  
   396  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   397  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) {
   398  	for {
   399  		select {
   400  		case <-time.After(period):
   401  			stats := b.Stats()
   402  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   403  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   404  		case <-stopCh:
   405  			return
   406  		}
   407  	}
   408  }