github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/blocked_evals.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	"github.com/armon/go-metrics"
     8  	"github.com/hashicorp/consul/lib"
     9  	"github.com/hashicorp/nomad/nomad/structs"
    10  )
    11  
    12  const (
    13  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    14  	// should be large to ensure that the FSM doesn't block when calling Unblock
    15  	// as this would apply back-pressure on Raft.
    16  	unblockBuffer = 8096
    17  )
    18  
    19  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    20  // certain class of nodes becomes available. An evaluation is put into the
    21  // blocked state when it is run through the scheduler and produced failed
    22  // allocations. It is unblocked when the capacity of a node that could run the
    23  // failed allocation becomes available.
    24  type BlockedEvals struct {
    25  	evalBroker *EvalBroker
    26  	enabled    bool
    27  	stats      *BlockedStats
    28  	l          sync.RWMutex
    29  
    30  	// captured is the set of evaluations that are captured by computed node
    31  	// classes.
    32  	captured map[string]wrappedEval
    33  
    34  	// escaped is the set of evaluations that have escaped computed node
    35  	// classes.
    36  	escaped map[string]wrappedEval
    37  
    38  	// unblockCh is used to buffer unblocking of evaluations.
    39  	capacityChangeCh chan *capacityUpdate
    40  
    41  	// jobs is the map of blocked job and is used to ensure that only one
    42  	// blocked eval exists for each job.
    43  	jobs map[string]struct{}
    44  
    45  	// unblockIndexes maps computed node classes to the index in which they were
    46  	// unblocked. This is used to check if an evaluation could have been
    47  	// unblocked between the time they were in the scheduler and the time they
    48  	// are being blocked.
    49  	unblockIndexes map[string]uint64
    50  
    51  	// duplicates is the set of evaluations for jobs that had pre-existing
    52  	// blocked evaluations. These should be marked as cancelled since only one
    53  	// blocked eval is neeeded per job.
    54  	duplicates []*structs.Evaluation
    55  
    56  	// duplicateCh is used to signal that a duplicate eval was added to the
    57  	// duplicate set. It can be used to unblock waiting callers looking for
    58  	// duplicates.
    59  	duplicateCh chan struct{}
    60  
    61  	// stopCh is used to stop any created goroutines.
    62  	stopCh chan struct{}
    63  }
    64  
    65  // capacityUpdate stores unblock data.
    66  type capacityUpdate struct {
    67  	computedClass string
    68  	index         uint64
    69  }
    70  
    71  // wrappedEval captures both the evaluation and the optional token
    72  type wrappedEval struct {
    73  	eval  *structs.Evaluation
    74  	token string
    75  }
    76  
    77  // BlockedStats returns all the stats about the blocked eval tracker.
    78  type BlockedStats struct {
    79  	// TotalEscaped is the total number of blocked evaluations that have escaped
    80  	// computed node classes.
    81  	TotalEscaped int
    82  
    83  	// TotalBlocked is the total number of blocked evaluations.
    84  	TotalBlocked int
    85  }
    86  
    87  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
    88  // unblocked evals into the passed broker.
    89  func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals {
    90  	return &BlockedEvals{
    91  		evalBroker:       evalBroker,
    92  		captured:         make(map[string]wrappedEval),
    93  		escaped:          make(map[string]wrappedEval),
    94  		jobs:             make(map[string]struct{}),
    95  		unblockIndexes:   make(map[string]uint64),
    96  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
    97  		duplicateCh:      make(chan struct{}, 1),
    98  		stopCh:           make(chan struct{}),
    99  		stats:            new(BlockedStats),
   100  	}
   101  }
   102  
   103  // Enabled is used to check if the broker is enabled.
   104  func (b *BlockedEvals) Enabled() bool {
   105  	b.l.RLock()
   106  	defer b.l.RUnlock()
   107  	return b.enabled
   108  }
   109  
   110  // SetEnabled is used to control if the blocked eval tracker is enabled. The
   111  // tracker should only be enabled on the active leader.
   112  func (b *BlockedEvals) SetEnabled(enabled bool) {
   113  	b.l.Lock()
   114  	if b.enabled == enabled {
   115  		// No-op
   116  		b.l.Unlock()
   117  		return
   118  	} else if enabled {
   119  		go b.watchCapacity()
   120  	} else {
   121  		close(b.stopCh)
   122  	}
   123  	b.enabled = enabled
   124  	b.l.Unlock()
   125  	if !enabled {
   126  		b.Flush()
   127  	}
   128  }
   129  
   130  // Block tracks the passed evaluation and enqueues it into the eval broker when
   131  // a suitable node calls unblock.
   132  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   133  	b.processBlock(eval, "")
   134  }
   135  
   136  // Reblock tracks the passed evaluation and enqueues it into the eval broker when
   137  // a suitable node calls unblock. Reblock should be used over Block when the
   138  // blocking is occurring by an outstanding evaluation. The token is the
   139  // evaluation's token.
   140  func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) {
   141  	b.processBlock(eval, token)
   142  }
   143  
   144  // processBlock is the implementation of blocking an evaluation. It supports
   145  // taking an optional evaluation token to use when reblocking an evaluation that
   146  // may be outstanding.
   147  func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) {
   148  	b.l.Lock()
   149  	defer b.l.Unlock()
   150  
   151  	// Do nothing if not enabled
   152  	if !b.enabled {
   153  		return
   154  	}
   155  
   156  	// Check if the job already has a blocked evaluation. If it does add it to
   157  	// the list of duplicates. We omly ever want one blocked evaluation per job,
   158  	// otherwise we would create unnecessary work for the scheduler as multiple
   159  	// evals for the same job would be run, all producing the same outcome.
   160  	if _, existing := b.jobs[eval.JobID]; existing {
   161  		b.duplicates = append(b.duplicates, eval)
   162  
   163  		// Unblock any waiter.
   164  		select {
   165  		case b.duplicateCh <- struct{}{}:
   166  		default:
   167  		}
   168  
   169  		return
   170  	}
   171  
   172  	// Check if the eval missed an unblock while it was in the scheduler at an
   173  	// older index. The scheduler could have been invoked with a snapshot of
   174  	// state that was prior to additional capacity being added or allocations
   175  	// becoming terminal.
   176  	if b.missedUnblock(eval) {
   177  		// Just re-enqueue the eval immediately. We pass the token so that the
   178  		// eval_broker can properly handle the case in which the evaluation is
   179  		// still outstanding.
   180  		b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token})
   181  		return
   182  	}
   183  
   184  	// Mark the job as tracked.
   185  	b.stats.TotalBlocked++
   186  	b.jobs[eval.JobID] = struct{}{}
   187  
   188  	// Wrap the evaluation, capturing its token.
   189  	wrapped := wrappedEval{
   190  		eval:  eval,
   191  		token: token,
   192  	}
   193  
   194  	// If the eval has escaped, meaning computed node classes could not capture
   195  	// the constraints of the job, we store the eval separately as we have to
   196  	// unblock it whenever node capacity changes. This is because we don't know
   197  	// what node class is feasible for the jobs constraints.
   198  	if eval.EscapedComputedClass {
   199  		b.escaped[eval.ID] = wrapped
   200  		b.stats.TotalEscaped++
   201  		return
   202  	}
   203  
   204  	// Add the eval to the set of blocked evals whose jobs constraints are
   205  	// captured by computed node class.
   206  	b.captured[eval.ID] = wrapped
   207  }
   208  
   209  // missedUnblock returns whether an evaluation missed an unblock while it was in
   210  // the scheduler. Since the scheduler can operate at an index in the past, the
   211  // evaluation may have been processed missing data that would allow it to
   212  // complete. This method returns if that is the case and should be called with
   213  // the lock held.
   214  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   215  	var max uint64 = 0
   216  	for class, index := range b.unblockIndexes {
   217  		// Calculate the max unblock index
   218  		if max < index {
   219  			max = index
   220  		}
   221  
   222  		elig, ok := eval.ClassEligibility[class]
   223  		if !ok && eval.SnapshotIndex < index {
   224  			// The evaluation was processed and did not encounter this class
   225  			// because it was added after it was processed. Thus for correctness
   226  			// we need to unblock it.
   227  			return true
   228  		}
   229  
   230  		// The evaluation could use the computed node class and the eval was
   231  		// processed before the last unblock.
   232  		if elig && eval.SnapshotIndex < index {
   233  			return true
   234  		}
   235  	}
   236  
   237  	// If the evaluation has escaped, and the map contains an index older than
   238  	// the evaluations, it should be unblocked.
   239  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   240  		return true
   241  	}
   242  
   243  	// The evaluation is ahead of all recent unblocks.
   244  	return false
   245  }
   246  
   247  // Unblock causes any evaluation that could potentially make progress on a
   248  // capacity change on the passed computed node class to be enqueued into the
   249  // eval broker.
   250  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   251  	b.l.Lock()
   252  
   253  	// Do nothing if not enabled
   254  	if !b.enabled {
   255  		b.l.Unlock()
   256  		return
   257  	}
   258  
   259  	// Store the index in which the unblock happened. We use this on subsequent
   260  	// block calls in case the evaluation was in the scheduler when a trigger
   261  	// occurred.
   262  	b.unblockIndexes[computedClass] = index
   263  	b.l.Unlock()
   264  
   265  	b.capacityChangeCh <- &capacityUpdate{
   266  		computedClass: computedClass,
   267  		index:         index,
   268  	}
   269  }
   270  
   271  // watchCapacity is a long lived function that watches for capacity changes in
   272  // nodes and unblocks the correct set of evals.
   273  func (b *BlockedEvals) watchCapacity() {
   274  	for {
   275  		select {
   276  		case <-b.stopCh:
   277  			return
   278  		case update := <-b.capacityChangeCh:
   279  			b.unblock(update.computedClass, update.index)
   280  		}
   281  	}
   282  }
   283  
   284  // unblock unblocks all blocked evals that could run on the passed computed node
   285  // class.
   286  func (b *BlockedEvals) unblock(computedClass string, index uint64) {
   287  	b.l.Lock()
   288  	defer b.l.Unlock()
   289  
   290  	// Protect against the case of a flush.
   291  	if !b.enabled {
   292  		return
   293  	}
   294  
   295  	// Every eval that has escaped computed node class has to be unblocked
   296  	// because any node could potentially be feasible.
   297  	numEscaped := len(b.escaped)
   298  	unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4))
   299  	if numEscaped != 0 {
   300  		for id, wrapped := range b.escaped {
   301  			unblocked[wrapped.eval] = wrapped.token
   302  			delete(b.escaped, id)
   303  			delete(b.jobs, wrapped.eval.JobID)
   304  		}
   305  	}
   306  
   307  	// We unblock any eval that is explicitly eligible for the computed class
   308  	// and also any eval that is not eligible or uneligible. This signifies that
   309  	// when the evaluation was originally run through the scheduler, that it
   310  	// never saw a node with the given computed class and thus needs to be
   311  	// unblocked for correctness.
   312  	for id, wrapped := range b.captured {
   313  		if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig {
   314  			// Can skip because the eval has explicitly marked the node class
   315  			// as ineligible.
   316  			continue
   317  		}
   318  
   319  		// The computed node class has never been seen by the eval so we unblock
   320  		// it.
   321  		unblocked[wrapped.eval] = wrapped.token
   322  		delete(b.jobs, wrapped.eval.JobID)
   323  		delete(b.captured, id)
   324  	}
   325  
   326  	if l := len(unblocked); l != 0 {
   327  		// Update the counters
   328  		b.stats.TotalEscaped = 0
   329  		b.stats.TotalBlocked -= l
   330  
   331  		// Enqueue all the unblocked evals into the broker.
   332  		b.evalBroker.EnqueueAll(unblocked)
   333  	}
   334  }
   335  
   336  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   337  // failure.
   338  func (b *BlockedEvals) UnblockFailed() {
   339  	b.l.Lock()
   340  	defer b.l.Unlock()
   341  
   342  	// Do nothing if not enabled
   343  	if !b.enabled {
   344  		return
   345  	}
   346  
   347  	unblocked := make(map[*structs.Evaluation]string, 4)
   348  	for id, wrapped := range b.captured {
   349  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   350  			unblocked[wrapped.eval] = wrapped.token
   351  			delete(b.captured, id)
   352  			delete(b.jobs, wrapped.eval.JobID)
   353  		}
   354  	}
   355  
   356  	for id, wrapped := range b.escaped {
   357  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   358  			unblocked[wrapped.eval] = wrapped.token
   359  			delete(b.escaped, id)
   360  			delete(b.jobs, wrapped.eval.JobID)
   361  			b.stats.TotalEscaped -= 1
   362  		}
   363  	}
   364  
   365  	if l := len(unblocked); l > 0 {
   366  		b.stats.TotalBlocked -= l
   367  		b.evalBroker.EnqueueAll(unblocked)
   368  	}
   369  }
   370  
   371  // GetDuplicates returns all the duplicate evaluations and blocks until the
   372  // passed timeout.
   373  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   374  	var timeoutTimer *time.Timer
   375  	var timeoutCh <-chan time.Time
   376  SCAN:
   377  	b.l.Lock()
   378  	if len(b.duplicates) != 0 {
   379  		dups := b.duplicates
   380  		b.duplicates = nil
   381  		b.l.Unlock()
   382  		return dups
   383  	}
   384  	b.l.Unlock()
   385  
   386  	// Create the timer
   387  	if timeoutTimer == nil && timeout != 0 {
   388  		timeoutTimer = time.NewTimer(timeout)
   389  		timeoutCh = timeoutTimer.C
   390  		defer timeoutTimer.Stop()
   391  	}
   392  
   393  	select {
   394  	case <-b.stopCh:
   395  		return nil
   396  	case <-timeoutCh:
   397  		return nil
   398  	case <-b.duplicateCh:
   399  		goto SCAN
   400  	}
   401  }
   402  
   403  // Flush is used to clear the state of blocked evaluations.
   404  func (b *BlockedEvals) Flush() {
   405  	b.l.Lock()
   406  	defer b.l.Unlock()
   407  
   408  	// Reset the blocked eval tracker.
   409  	b.stats.TotalEscaped = 0
   410  	b.stats.TotalBlocked = 0
   411  	b.captured = make(map[string]wrappedEval)
   412  	b.escaped = make(map[string]wrappedEval)
   413  	b.jobs = make(map[string]struct{})
   414  	b.duplicates = nil
   415  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   416  	b.stopCh = make(chan struct{})
   417  	b.duplicateCh = make(chan struct{}, 1)
   418  }
   419  
   420  // Stats is used to query the state of the blocked eval tracker.
   421  func (b *BlockedEvals) Stats() *BlockedStats {
   422  	// Allocate a new stats struct
   423  	stats := new(BlockedStats)
   424  
   425  	b.l.RLock()
   426  	defer b.l.RUnlock()
   427  
   428  	// Copy all the stats
   429  	stats.TotalEscaped = b.stats.TotalEscaped
   430  	stats.TotalBlocked = b.stats.TotalBlocked
   431  	return stats
   432  }
   433  
   434  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   435  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) {
   436  	for {
   437  		select {
   438  		case <-time.After(period):
   439  			stats := b.Stats()
   440  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   441  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   442  		case <-stopCh:
   443  			return
   444  		}
   445  	}
   446  }