github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/blocked_evals.go

github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/blocked_evals.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	"github.com/armon/go-metrics"
     8  	"github.com/hashicorp/consul/lib"
     9  	"github.com/hashicorp/nomad/nomad/structs"
    10  )
    11  
    12  const (
    13  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    14  	// should be large to ensure that the FSM doesn't block when calling Unblock
    15  	// as this would apply back-pressure on Raft.
    16  	unblockBuffer = 8096
    17  )
    18  
    19  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    20  // certain class of nodes becomes available. An evaluation is put into the
    21  // blocked state when it is run through the scheduler and produced failed
    22  // allocations. It is unblocked when the capacity of a node that could run the
    23  // failed allocation becomes available.
    24  type BlockedEvals struct {
    25  	evalBroker *EvalBroker
    26  	enabled    bool
    27  	stats      *BlockedStats
    28  	l          sync.RWMutex
    29  
    30  	// captured is the set of evaluations that are captured by computed node
    31  	// classes.
    32  	captured map[string]wrappedEval
    33  
    34  	// escaped is the set of evaluations that have escaped computed node
    35  	// classes.
    36  	escaped map[string]wrappedEval
    37  
    38  	// unblockCh is used to buffer unblocking of evaluations.
    39  	capacityChangeCh chan *capacityUpdate
    40  
    41  	// jobs is the map of blocked job and is used to ensure that only one
    42  	// blocked eval exists for each job. The value is the blocked evaluation ID.
    43  	jobs map[string]string
    44  
    45  	// unblockIndexes maps computed node classes to the index in which they were
    46  	// unblocked. This is used to check if an evaluation could have been
    47  	// unblocked between the time they were in the scheduler and the time they
    48  	// are being blocked.
    49  	unblockIndexes map[string]uint64
    50  
    51  	// duplicates is the set of evaluations for jobs that had pre-existing
    52  	// blocked evaluations. These should be marked as cancelled since only one
    53  	// blocked eval is neeeded per job.
    54  	duplicates []*structs.Evaluation
    55  
    56  	// duplicateCh is used to signal that a duplicate eval was added to the
    57  	// duplicate set. It can be used to unblock waiting callers looking for
    58  	// duplicates.
    59  	duplicateCh chan struct{}
    60  
    61  	// stopCh is used to stop any created goroutines.
    62  	stopCh chan struct{}
    63  }
    64  
    65  // capacityUpdate stores unblock data.
    66  type capacityUpdate struct {
    67  	computedClass string
    68  	index         uint64
    69  }
    70  
    71  // wrappedEval captures both the evaluation and the optional token
    72  type wrappedEval struct {
    73  	eval  *structs.Evaluation
    74  	token string
    75  }
    76  
    77  // BlockedStats returns all the stats about the blocked eval tracker.
    78  type BlockedStats struct {
    79  	// TotalEscaped is the total number of blocked evaluations that have escaped
    80  	// computed node classes.
    81  	TotalEscaped int
    82  
    83  	// TotalBlocked is the total number of blocked evaluations.
    84  	TotalBlocked int
    85  }
    86  
    87  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
    88  // unblocked evals into the passed broker.
    89  func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals {
    90  	return &BlockedEvals{
    91  		evalBroker:       evalBroker,
    92  		captured:         make(map[string]wrappedEval),
    93  		escaped:          make(map[string]wrappedEval),
    94  		jobs:             make(map[string]string),
    95  		unblockIndexes:   make(map[string]uint64),
    96  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
    97  		duplicateCh:      make(chan struct{}, 1),
    98  		stopCh:           make(chan struct{}),
    99  		stats:            new(BlockedStats),
   100  	}
   101  }
   102  
   103  // Enabled is used to check if the broker is enabled.
   104  func (b *BlockedEvals) Enabled() bool {
   105  	b.l.RLock()
   106  	defer b.l.RUnlock()
   107  	return b.enabled
   108  }
   109  
   110  // SetEnabled is used to control if the blocked eval tracker is enabled. The
   111  // tracker should only be enabled on the active leader.
   112  func (b *BlockedEvals) SetEnabled(enabled bool) {
   113  	b.l.Lock()
   114  	if b.enabled == enabled {
   115  		// No-op
   116  		b.l.Unlock()
   117  		return
   118  	} else if enabled {
   119  		go b.watchCapacity()
   120  	} else {
   121  		close(b.stopCh)
   122  	}
   123  	b.enabled = enabled
   124  	b.l.Unlock()
   125  	if !enabled {
   126  		b.Flush()
   127  	}
   128  }
   129  
   130  // Block tracks the passed evaluation and enqueues it into the eval broker when
   131  // a suitable node calls unblock.
   132  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   133  	b.processBlock(eval, "")
   134  }
   135  
   136  // Reblock tracks the passed evaluation and enqueues it into the eval broker when
   137  // a suitable node calls unblock. Reblock should be used over Block when the
   138  // blocking is occurring by an outstanding evaluation. The token is the
   139  // evaluation's token.
   140  func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) {
   141  	b.processBlock(eval, token)
   142  }
   143  
   144  // processBlock is the implementation of blocking an evaluation. It supports
   145  // taking an optional evaluation token to use when reblocking an evaluation that
   146  // may be outstanding.
   147  func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) {
   148  	b.l.Lock()
   149  	defer b.l.Unlock()
   150  
   151  	// Do nothing if not enabled
   152  	if !b.enabled {
   153  		return
   154  	}
   155  
   156  	// Check if the job already has a blocked evaluation. If it does add it to
   157  	// the list of duplicates. We omly ever want one blocked evaluation per job,
   158  	// otherwise we would create unnecessary work for the scheduler as multiple
   159  	// evals for the same job would be run, all producing the same outcome.
   160  	if _, existing := b.jobs[eval.JobID]; existing {
   161  		b.duplicates = append(b.duplicates, eval)
   162  
   163  		// Unblock any waiter.
   164  		select {
   165  		case b.duplicateCh <- struct{}{}:
   166  		default:
   167  		}
   168  
   169  		return
   170  	}
   171  
   172  	// Check if the eval missed an unblock while it was in the scheduler at an
   173  	// older index. The scheduler could have been invoked with a snapshot of
   174  	// state that was prior to additional capacity being added or allocations
   175  	// becoming terminal.
   176  	if b.missedUnblock(eval) {
   177  		// Just re-enqueue the eval immediately. We pass the token so that the
   178  		// eval_broker can properly handle the case in which the evaluation is
   179  		// still outstanding.
   180  		b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token})
   181  		return
   182  	}
   183  
   184  	// Mark the job as tracked.
   185  	b.stats.TotalBlocked++
   186  	b.jobs[eval.JobID] = eval.ID
   187  
   188  	// Wrap the evaluation, capturing its token.
   189  	wrapped := wrappedEval{
   190  		eval:  eval,
   191  		token: token,
   192  	}
   193  
   194  	// If the eval has escaped, meaning computed node classes could not capture
   195  	// the constraints of the job, we store the eval separately as we have to
   196  	// unblock it whenever node capacity changes. This is because we don't know
   197  	// what node class is feasible for the jobs constraints.
   198  	if eval.EscapedComputedClass {
   199  		b.escaped[eval.ID] = wrapped
   200  		b.stats.TotalEscaped++
   201  		return
   202  	}
   203  
   204  	// Add the eval to the set of blocked evals whose jobs constraints are
   205  	// captured by computed node class.
   206  	b.captured[eval.ID] = wrapped
   207  }
   208  
   209  // missedUnblock returns whether an evaluation missed an unblock while it was in
   210  // the scheduler. Since the scheduler can operate at an index in the past, the
   211  // evaluation may have been processed missing data that would allow it to
   212  // complete. This method returns if that is the case and should be called with
   213  // the lock held.
   214  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   215  	var max uint64 = 0
   216  	for class, index := range b.unblockIndexes {
   217  		// Calculate the max unblock index
   218  		if max < index {
   219  			max = index
   220  		}
   221  
   222  		elig, ok := eval.ClassEligibility[class]
   223  		if !ok && eval.SnapshotIndex < index {
   224  			// The evaluation was processed and did not encounter this class
   225  			// because it was added after it was processed. Thus for correctness
   226  			// we need to unblock it.
   227  			return true
   228  		}
   229  
   230  		// The evaluation could use the computed node class and the eval was
   231  		// processed before the last unblock.
   232  		if elig && eval.SnapshotIndex < index {
   233  			return true
   234  		}
   235  	}
   236  
   237  	// If the evaluation has escaped, and the map contains an index older than
   238  	// the evaluations, it should be unblocked.
   239  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   240  		return true
   241  	}
   242  
   243  	// The evaluation is ahead of all recent unblocks.
   244  	return false
   245  }
   246  
   247  // Untrack causes any blocked evaluation for the passed job to be no longer
   248  // tracked. Untrack is called when there is a successful evaluation for the job
   249  // and a blocked evaluation is no longer needed.
   250  func (b *BlockedEvals) Untrack(jobID string) {
   251  	b.l.Lock()
   252  	defer b.l.Unlock()
   253  
   254  	// Do nothing if not enabled
   255  	if !b.enabled {
   256  		return
   257  	}
   258  
   259  	// Get the evaluation ID to cancel
   260  	evalID, ok := b.jobs[jobID]
   261  	if !ok {
   262  		// No blocked evaluation so exit
   263  		return
   264  	}
   265  
   266  	// Attempt to delete the evaluation
   267  	if w, ok := b.captured[evalID]; ok {
   268  		delete(b.jobs, w.eval.JobID)
   269  		delete(b.captured, evalID)
   270  		b.stats.TotalBlocked--
   271  	}
   272  
   273  	if w, ok := b.escaped[evalID]; ok {
   274  		delete(b.jobs, w.eval.JobID)
   275  		delete(b.escaped, evalID)
   276  		b.stats.TotalEscaped--
   277  		b.stats.TotalBlocked--
   278  	}
   279  }
   280  
   281  // Unblock causes any evaluation that could potentially make progress on a
   282  // capacity change on the passed computed node class to be enqueued into the
   283  // eval broker.
   284  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   285  	b.l.Lock()
   286  
   287  	// Do nothing if not enabled
   288  	if !b.enabled {
   289  		b.l.Unlock()
   290  		return
   291  	}
   292  
   293  	// Store the index in which the unblock happened. We use this on subsequent
   294  	// block calls in case the evaluation was in the scheduler when a trigger
   295  	// occurred.
   296  	b.unblockIndexes[computedClass] = index
   297  	b.l.Unlock()
   298  
   299  	b.capacityChangeCh <- &capacityUpdate{
   300  		computedClass: computedClass,
   301  		index:         index,
   302  	}
   303  }
   304  
   305  // watchCapacity is a long lived function that watches for capacity changes in
   306  // nodes and unblocks the correct set of evals.
   307  func (b *BlockedEvals) watchCapacity() {
   308  	for {
   309  		select {
   310  		case <-b.stopCh:
   311  			return
   312  		case update := <-b.capacityChangeCh:
   313  			b.unblock(update.computedClass, update.index)
   314  		}
   315  	}
   316  }
   317  
   318  // unblock unblocks all blocked evals that could run on the passed computed node
   319  // class.
   320  func (b *BlockedEvals) unblock(computedClass string, index uint64) {
   321  	b.l.Lock()
   322  	defer b.l.Unlock()
   323  
   324  	// Protect against the case of a flush.
   325  	if !b.enabled {
   326  		return
   327  	}
   328  
   329  	// Every eval that has escaped computed node class has to be unblocked
   330  	// because any node could potentially be feasible.
   331  	numEscaped := len(b.escaped)
   332  	unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4))
   333  	if numEscaped != 0 {
   334  		for id, wrapped := range b.escaped {
   335  			unblocked[wrapped.eval] = wrapped.token
   336  			delete(b.escaped, id)
   337  			delete(b.jobs, wrapped.eval.JobID)
   338  		}
   339  	}
   340  
   341  	// We unblock any eval that is explicitly eligible for the computed class
   342  	// and also any eval that is not eligible or uneligible. This signifies that
   343  	// when the evaluation was originally run through the scheduler, that it
   344  	// never saw a node with the given computed class and thus needs to be
   345  	// unblocked for correctness.
   346  	for id, wrapped := range b.captured {
   347  		if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig {
   348  			// Can skip because the eval has explicitly marked the node class
   349  			// as ineligible.
   350  			continue
   351  		}
   352  
   353  		// The computed node class has never been seen by the eval so we unblock
   354  		// it.
   355  		unblocked[wrapped.eval] = wrapped.token
   356  		delete(b.jobs, wrapped.eval.JobID)
   357  		delete(b.captured, id)
   358  	}
   359  
   360  	if l := len(unblocked); l != 0 {
   361  		// Update the counters
   362  		b.stats.TotalEscaped = 0
   363  		b.stats.TotalBlocked -= l
   364  
   365  		// Enqueue all the unblocked evals into the broker.
   366  		b.evalBroker.EnqueueAll(unblocked)
   367  	}
   368  }
   369  
   370  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   371  // failure.
   372  func (b *BlockedEvals) UnblockFailed() {
   373  	b.l.Lock()
   374  	defer b.l.Unlock()
   375  
   376  	// Do nothing if not enabled
   377  	if !b.enabled {
   378  		return
   379  	}
   380  
   381  	unblocked := make(map[*structs.Evaluation]string, 4)
   382  	for id, wrapped := range b.captured {
   383  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   384  			unblocked[wrapped.eval] = wrapped.token
   385  			delete(b.captured, id)
   386  			delete(b.jobs, wrapped.eval.JobID)
   387  		}
   388  	}
   389  
   390  	for id, wrapped := range b.escaped {
   391  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   392  			unblocked[wrapped.eval] = wrapped.token
   393  			delete(b.escaped, id)
   394  			delete(b.jobs, wrapped.eval.JobID)
   395  			b.stats.TotalEscaped -= 1
   396  		}
   397  	}
   398  
   399  	if l := len(unblocked); l > 0 {
   400  		b.stats.TotalBlocked -= l
   401  		b.evalBroker.EnqueueAll(unblocked)
   402  	}
   403  }
   404  
   405  // GetDuplicates returns all the duplicate evaluations and blocks until the
   406  // passed timeout.
   407  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   408  	var timeoutTimer *time.Timer
   409  	var timeoutCh <-chan time.Time
   410  SCAN:
   411  	b.l.Lock()
   412  	if len(b.duplicates) != 0 {
   413  		dups := b.duplicates
   414  		b.duplicates = nil
   415  		b.l.Unlock()
   416  		return dups
   417  	}
   418  	b.l.Unlock()
   419  
   420  	// Create the timer
   421  	if timeoutTimer == nil && timeout != 0 {
   422  		timeoutTimer = time.NewTimer(timeout)
   423  		timeoutCh = timeoutTimer.C
   424  		defer timeoutTimer.Stop()
   425  	}
   426  
   427  	select {
   428  	case <-b.stopCh:
   429  		return nil
   430  	case <-timeoutCh:
   431  		return nil
   432  	case <-b.duplicateCh:
   433  		goto SCAN
   434  	}
   435  }
   436  
   437  // Flush is used to clear the state of blocked evaluations.
   438  func (b *BlockedEvals) Flush() {
   439  	b.l.Lock()
   440  	defer b.l.Unlock()
   441  
   442  	// Reset the blocked eval tracker.
   443  	b.stats.TotalEscaped = 0
   444  	b.stats.TotalBlocked = 0
   445  	b.captured = make(map[string]wrappedEval)
   446  	b.escaped = make(map[string]wrappedEval)
   447  	b.jobs = make(map[string]string)
   448  	b.duplicates = nil
   449  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   450  	b.stopCh = make(chan struct{})
   451  	b.duplicateCh = make(chan struct{}, 1)
   452  }
   453  
   454  // Stats is used to query the state of the blocked eval tracker.
   455  func (b *BlockedEvals) Stats() *BlockedStats {
   456  	// Allocate a new stats struct
   457  	stats := new(BlockedStats)
   458  
   459  	b.l.RLock()
   460  	defer b.l.RUnlock()
   461  
   462  	// Copy all the stats
   463  	stats.TotalEscaped = b.stats.TotalEscaped
   464  	stats.TotalBlocked = b.stats.TotalBlocked
   465  	return stats
   466  }
   467  
   468  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   469  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) {
   470  	for {
   471  		select {
   472  		case <-time.After(period):
   473  			stats := b.Stats()
   474  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   475  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   476  		case <-stopCh:
   477  			return
   478  		}
   479  	}
   480  }