github.com/manicqin/nomad@v0.9.5/nomad/blocked_evals.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	metrics "github.com/armon/go-metrics"
     8  	"github.com/hashicorp/consul/lib"
     9  	log "github.com/hashicorp/go-hclog"
    10  	"github.com/hashicorp/nomad/helper"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  )
    13  
    14  const (
    15  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    16  	// should be large to ensure that the FSM doesn't block when calling Unblock
    17  	// as this would apply back-pressure on Raft.
    18  	unblockBuffer = 8096
    19  
    20  	// pruneInterval is the interval at which we prune objects from the
    21  	// BlockedEvals tracker
    22  	pruneInterval = 5 * time.Minute
    23  
    24  	// pruneThreshold is the threshold after which objects will be pruned.
    25  	pruneThreshold = 15 * time.Minute
    26  )
    27  
    28  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    29  // certain class of nodes becomes available. An evaluation is put into the
    30  // blocked state when it is run through the scheduler and produced failed
    31  // allocations. It is unblocked when the capacity of a node that could run the
    32  // failed allocation becomes available.
    33  type BlockedEvals struct {
    34  	// logger is the logger to use by the blocked eval tracker.
    35  	logger log.Logger
    36  
    37  	evalBroker *EvalBroker
    38  	enabled    bool
    39  	stats      *BlockedStats
    40  	l          sync.RWMutex
    41  
    42  	// captured is the set of evaluations that are captured by computed node
    43  	// classes.
    44  	captured map[string]wrappedEval
    45  
    46  	// escaped is the set of evaluations that have escaped computed node
    47  	// classes.
    48  	escaped map[string]wrappedEval
    49  
    50  	// system is the set of system evaluations that failed to start on nodes because of
    51  	// resource constraints.
    52  	system *systemEvals
    53  
    54  	// unblockCh is used to buffer unblocking of evaluations.
    55  	capacityChangeCh chan *capacityUpdate
    56  
    57  	// jobs is the map of blocked job and is used to ensure that only one
    58  	// blocked eval exists for each job. The value is the blocked evaluation ID.
    59  	jobs map[structs.NamespacedID]string
    60  
    61  	// unblockIndexes maps computed node classes or quota name to the index in
    62  	// which they were unblocked. This is used to check if an evaluation could
    63  	// have been unblocked between the time they were in the scheduler and the
    64  	// time they are being blocked.
    65  	unblockIndexes map[string]uint64
    66  
    67  	// duplicates is the set of evaluations for jobs that had pre-existing
    68  	// blocked evaluations. These should be marked as cancelled since only one
    69  	// blocked eval is needed per job.
    70  	duplicates []*structs.Evaluation
    71  
    72  	// duplicateCh is used to signal that a duplicate eval was added to the
    73  	// duplicate set. It can be used to unblock waiting callers looking for
    74  	// duplicates.
    75  	duplicateCh chan struct{}
    76  
    77  	// timetable is used to correlate indexes with their insertion time. This
    78  	// allows us to prune based on time.
    79  	timetable *TimeTable
    80  
    81  	// stopCh is used to stop any created goroutines.
    82  	stopCh chan struct{}
    83  }
    84  
    85  // capacityUpdate stores unblock data.
    86  type capacityUpdate struct {
    87  	computedClass string
    88  	quotaChange   string
    89  	index         uint64
    90  }
    91  
    92  // wrappedEval captures both the evaluation and the optional token
    93  type wrappedEval struct {
    94  	eval  *structs.Evaluation
    95  	token string
    96  }
    97  
    98  // BlockedStats returns all the stats about the blocked eval tracker.
    99  type BlockedStats struct {
   100  	// TotalEscaped is the total number of blocked evaluations that have escaped
   101  	// computed node classes.
   102  	TotalEscaped int
   103  
   104  	// TotalBlocked is the total number of blocked evaluations.
   105  	TotalBlocked int
   106  
   107  	// TotalQuotaLimit is the total number of blocked evaluations that are due
   108  	// to the quota limit being reached.
   109  	TotalQuotaLimit int
   110  }
   111  
   112  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
   113  // unblocked evals into the passed broker.
   114  func NewBlockedEvals(evalBroker *EvalBroker, logger log.Logger) *BlockedEvals {
   115  	return &BlockedEvals{
   116  		logger:           logger.Named("blocked_evals"),
   117  		evalBroker:       evalBroker,
   118  		captured:         make(map[string]wrappedEval),
   119  		escaped:          make(map[string]wrappedEval),
   120  		system:           newSystemEvals(),
   121  		jobs:             make(map[structs.NamespacedID]string),
   122  		unblockIndexes:   make(map[string]uint64),
   123  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
   124  		duplicateCh:      make(chan struct{}, 1),
   125  		stopCh:           make(chan struct{}),
   126  		stats:            new(BlockedStats),
   127  	}
   128  }
   129  
   130  // Enabled is used to check if the broker is enabled.
   131  func (b *BlockedEvals) Enabled() bool {
   132  	b.l.RLock()
   133  	defer b.l.RUnlock()
   134  	return b.enabled
   135  }
   136  
   137  // SetEnabled is used to control if the blocked eval tracker is enabled. The
   138  // tracker should only be enabled on the active leader.
   139  func (b *BlockedEvals) SetEnabled(enabled bool) {
   140  	b.l.Lock()
   141  	if b.enabled == enabled {
   142  		// No-op
   143  		b.l.Unlock()
   144  		return
   145  	} else if enabled {
   146  		go b.watchCapacity(b.stopCh, b.capacityChangeCh)
   147  		go b.prune(b.stopCh)
   148  	} else {
   149  		close(b.stopCh)
   150  	}
   151  	b.enabled = enabled
   152  	b.l.Unlock()
   153  	if !enabled {
   154  		b.Flush()
   155  	}
   156  }
   157  
   158  func (b *BlockedEvals) SetTimetable(timetable *TimeTable) {
   159  	b.l.Lock()
   160  	b.timetable = timetable
   161  	b.l.Unlock()
   162  }
   163  
   164  // Block tracks the passed evaluation and enqueues it into the eval broker when
   165  // a suitable node calls unblock.
   166  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   167  	b.processBlock(eval, "")
   168  }
   169  
   170  // Reblock tracks the passed evaluation and enqueues it into the eval broker when
   171  // a suitable node calls unblock. Reblock should be used over Block when the
   172  // blocking is occurring by an outstanding evaluation. The token is the
   173  // evaluation's token.
   174  func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) {
   175  	b.processBlock(eval, token)
   176  }
   177  
   178  // processBlock is the implementation of blocking an evaluation. It supports
   179  // taking an optional evaluation token to use when reblocking an evaluation that
   180  // may be outstanding.
   181  func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) {
   182  	b.l.Lock()
   183  	defer b.l.Unlock()
   184  
   185  	// Do nothing if not enabled
   186  	if !b.enabled {
   187  		return
   188  	}
   189  
   190  	// Handle the new evaluation being for a job we are already tracking.
   191  	if b.processBlockJobDuplicate(eval) {
   192  		// If process block job duplicate returns true, the new evaluation has
   193  		// been marked as a duplicate and we have nothing to do, so return
   194  		// early.
   195  		return
   196  	}
   197  
   198  	// Check if the eval missed an unblock while it was in the scheduler at an
   199  	// older index. The scheduler could have been invoked with a snapshot of
   200  	// state that was prior to additional capacity being added or allocations
   201  	// becoming terminal.
   202  	if b.missedUnblock(eval) {
   203  		// Just re-enqueue the eval immediately. We pass the token so that the
   204  		// eval_broker can properly handle the case in which the evaluation is
   205  		// still outstanding.
   206  		b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token})
   207  		return
   208  	}
   209  
   210  	// Mark the job as tracked.
   211  	b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)] = eval.ID
   212  	b.stats.TotalBlocked++
   213  
   214  	// Track that the evaluation is being added due to reaching the quota limit
   215  	if eval.QuotaLimitReached != "" {
   216  		b.stats.TotalQuotaLimit++
   217  	}
   218  
   219  	// Wrap the evaluation, capturing its token.
   220  	wrapped := wrappedEval{
   221  		eval:  eval,
   222  		token: token,
   223  	}
   224  
   225  	// If the eval has escaped, meaning computed node classes could not capture
   226  	// the constraints of the job, we store the eval separately as we have to
   227  	// unblock it whenever node capacity changes. This is because we don't know
   228  	// what node class is feasible for the jobs constraints.
   229  	if eval.EscapedComputedClass {
   230  		b.escaped[eval.ID] = wrapped
   231  		b.stats.TotalEscaped++
   232  		return
   233  	}
   234  
   235  	// System evals are indexed by node and re-processed on utilization changes in
   236  	// existing nodes
   237  	if eval.Type == structs.JobTypeSystem {
   238  		b.system.Add(eval, token)
   239  	}
   240  
   241  	// Add the eval to the set of blocked evals whose jobs constraints are
   242  	// captured by computed node class.
   243  	b.captured[eval.ID] = wrapped
   244  }
   245  
   246  // processBlockJobDuplicate handles the case where the new eval is for a job
   247  // that we are already tracking. If the eval is a duplicate, we add the older
   248  // evaluation by Raft index to the list of duplicates such that it can be
   249  // cancelled. We only ever want one blocked evaluation per job, otherwise we
   250  // would create unnecessary work for the scheduler as multiple evals for the
   251  // same job would be run, all producing the same outcome. It is critical to
   252  // prefer the newer evaluation, since it will contain the most up to date set of
   253  // class eligibility. The return value is set to true, if the passed evaluation
   254  // is cancelled. This should be called with the lock held.
   255  func (b *BlockedEvals) processBlockJobDuplicate(eval *structs.Evaluation) (newCancelled bool) {
   256  	existingID, hasExisting := b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)]
   257  	if !hasExisting {
   258  		return
   259  	}
   260  
   261  	var dup *structs.Evaluation
   262  	existingW, ok := b.captured[existingID]
   263  	if ok {
   264  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   265  			delete(b.captured, existingID)
   266  			b.stats.TotalBlocked--
   267  			dup = existingW.eval
   268  		} else {
   269  			dup = eval
   270  			newCancelled = true
   271  		}
   272  	} else {
   273  		existingW, ok = b.escaped[existingID]
   274  		if !ok {
   275  			// This is a programming error
   276  			b.logger.Error("existing blocked evaluation is neither tracked as captured or escaped", "existing_id", existingID)
   277  			delete(b.jobs, structs.NewNamespacedID(eval.JobID, eval.Namespace))
   278  			return
   279  		}
   280  
   281  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   282  			delete(b.escaped, existingID)
   283  			b.stats.TotalEscaped--
   284  			dup = existingW.eval
   285  		} else {
   286  			dup = eval
   287  			newCancelled = true
   288  		}
   289  	}
   290  
   291  	b.duplicates = append(b.duplicates, dup)
   292  
   293  	// Unblock any waiter.
   294  	select {
   295  	case b.duplicateCh <- struct{}{}:
   296  	default:
   297  	}
   298  
   299  	return
   300  }
   301  
   302  // latestEvalIndex returns the max of the evaluations create and snapshot index
   303  func latestEvalIndex(eval *structs.Evaluation) uint64 {
   304  	if eval == nil {
   305  		return 0
   306  	}
   307  
   308  	return helper.Uint64Max(eval.CreateIndex, eval.SnapshotIndex)
   309  }
   310  
   311  // missedUnblock returns whether an evaluation missed an unblock while it was in
   312  // the scheduler. Since the scheduler can operate at an index in the past, the
   313  // evaluation may have been processed missing data that would allow it to
   314  // complete. This method returns if that is the case and should be called with
   315  // the lock held.
   316  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   317  	var max uint64 = 0
   318  	for id, index := range b.unblockIndexes {
   319  		// Calculate the max unblock index
   320  		if max < index {
   321  			max = index
   322  		}
   323  
   324  		// The evaluation is blocked because it has hit a quota limit not class
   325  		// eligibility
   326  		if eval.QuotaLimitReached != "" {
   327  			if eval.QuotaLimitReached != id {
   328  				// Not a match
   329  				continue
   330  			} else if eval.SnapshotIndex < index {
   331  				// The evaluation was processed before the quota specification was
   332  				// updated, so unblock the evaluation.
   333  				return true
   334  			}
   335  
   336  			// The evaluation was processed having seen all changes to the quota
   337  			return false
   338  		}
   339  
   340  		elig, ok := eval.ClassEligibility[id]
   341  		if !ok && eval.SnapshotIndex < index {
   342  			// The evaluation was processed and did not encounter this class
   343  			// because it was added after it was processed. Thus for correctness
   344  			// we need to unblock it.
   345  			return true
   346  		}
   347  
   348  		// The evaluation could use the computed node class and the eval was
   349  		// processed before the last unblock.
   350  		if elig && eval.SnapshotIndex < index {
   351  			return true
   352  		}
   353  	}
   354  
   355  	// If the evaluation has escaped, and the map contains an index older than
   356  	// the evaluations, it should be unblocked.
   357  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   358  		return true
   359  	}
   360  
   361  	// The evaluation is ahead of all recent unblocks.
   362  	return false
   363  }
   364  
   365  // Untrack causes any blocked evaluation for the passed job to be no longer
   366  // tracked. Untrack is called when there is a successful evaluation for the job
   367  // and a blocked evaluation is no longer needed.
   368  func (b *BlockedEvals) Untrack(jobID, namespace string) {
   369  	b.l.Lock()
   370  	defer b.l.Unlock()
   371  
   372  	// Do nothing if not enabled
   373  	if !b.enabled {
   374  		return
   375  	}
   376  
   377  	nsID := structs.NewNamespacedID(jobID, namespace)
   378  
   379  	if evals, ok := b.system.JobEvals(nsID); ok {
   380  		for _, e := range evals {
   381  			b.system.Remove(e)
   382  			b.stats.TotalBlocked--
   383  		}
   384  		return
   385  	}
   386  
   387  	// Get the evaluation ID to cancel
   388  	evalID, ok := b.jobs[nsID]
   389  	if !ok {
   390  		// No blocked evaluation so exit
   391  		return
   392  	}
   393  
   394  	// Attempt to delete the evaluation
   395  	if w, ok := b.captured[evalID]; ok {
   396  		delete(b.jobs, nsID)
   397  		delete(b.captured, evalID)
   398  		b.stats.TotalBlocked--
   399  		if w.eval.QuotaLimitReached != "" {
   400  			b.stats.TotalQuotaLimit--
   401  		}
   402  	}
   403  
   404  	if w, ok := b.escaped[evalID]; ok {
   405  		delete(b.jobs, nsID)
   406  		delete(b.escaped, evalID)
   407  		b.stats.TotalEscaped--
   408  		b.stats.TotalBlocked--
   409  		if w.eval.QuotaLimitReached != "" {
   410  			b.stats.TotalQuotaLimit--
   411  		}
   412  	}
   413  }
   414  
   415  // Unblock causes any evaluation that could potentially make progress on a
   416  // capacity change on the passed computed node class to be enqueued into the
   417  // eval broker.
   418  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   419  	b.l.Lock()
   420  
   421  	// Do nothing if not enabled
   422  	if !b.enabled {
   423  		b.l.Unlock()
   424  		return
   425  	}
   426  
   427  	// Store the index in which the unblock happened. We use this on subsequent
   428  	// block calls in case the evaluation was in the scheduler when a trigger
   429  	// occurred.
   430  	b.unblockIndexes[computedClass] = index
   431  	b.l.Unlock()
   432  
   433  	b.capacityChangeCh <- &capacityUpdate{
   434  		computedClass: computedClass,
   435  		index:         index,
   436  	}
   437  }
   438  
   439  // UnblockQuota causes any evaluation that could potentially make progress on a
   440  // capacity change on the passed quota to be enqueued into the eval broker.
   441  func (b *BlockedEvals) UnblockQuota(quota string, index uint64) {
   442  	// Nothing to do
   443  	if quota == "" {
   444  		return
   445  	}
   446  
   447  	b.l.Lock()
   448  
   449  	// Do nothing if not enabled
   450  	if !b.enabled {
   451  		b.l.Unlock()
   452  		return
   453  	}
   454  
   455  	// Store the index in which the unblock happened. We use this on subsequent
   456  	// block calls in case the evaluation was in the scheduler when a trigger
   457  	// occurred.
   458  	b.unblockIndexes[quota] = index
   459  	b.l.Unlock()
   460  
   461  	b.capacityChangeCh <- &capacityUpdate{
   462  		quotaChange: quota,
   463  		index:       index,
   464  	}
   465  }
   466  
   467  // UnblockClassAndQuota causes any evaluation that could potentially make
   468  // progress on a capacity change on the passed computed node class or quota to
   469  // be enqueued into the eval broker.
   470  func (b *BlockedEvals) UnblockClassAndQuota(class, quota string, index uint64) {
   471  	b.l.Lock()
   472  
   473  	// Do nothing if not enabled
   474  	if !b.enabled {
   475  		b.l.Unlock()
   476  		return
   477  	}
   478  
   479  	// Store the index in which the unblock happened. We use this on subsequent
   480  	// block calls in case the evaluation was in the scheduler when a trigger
   481  	// occurred.
   482  	if quota != "" {
   483  		b.unblockIndexes[quota] = index
   484  	}
   485  	b.unblockIndexes[class] = index
   486  
   487  	// Capture chan inside the lock to prevent a race with it getting reset
   488  	// in Flush.
   489  	ch := b.capacityChangeCh
   490  	b.l.Unlock()
   491  
   492  	ch <- &capacityUpdate{
   493  		computedClass: class,
   494  		quotaChange:   quota,
   495  		index:         index,
   496  	}
   497  }
   498  
   499  // UnblockNode finds any blocked evalution that's node specific (system jobs) and enqueues
   500  // it on the eval broker
   501  func (b *BlockedEvals) UnblockNode(nodeID string, index uint64) {
   502  	b.l.Lock()
   503  	defer b.l.Unlock()
   504  
   505  	evals, ok := b.system.NodeEvals(nodeID)
   506  
   507  	// Do nothing if not enabled
   508  	if !b.enabled || !ok || len(evals) == 0 {
   509  		return
   510  	}
   511  
   512  	for e := range evals {
   513  		b.system.Remove(e)
   514  		b.stats.TotalBlocked--
   515  	}
   516  
   517  	b.evalBroker.EnqueueAll(evals)
   518  }
   519  
   520  // watchCapacity is a long lived function that watches for capacity changes in
   521  // nodes and unblocks the correct set of evals.
   522  func (b *BlockedEvals) watchCapacity(stopCh <-chan struct{}, changeCh <-chan *capacityUpdate) {
   523  	for {
   524  		select {
   525  		case <-stopCh:
   526  			return
   527  		case update := <-changeCh:
   528  			b.unblock(update.computedClass, update.quotaChange, update.index)
   529  		}
   530  	}
   531  }
   532  
   533  func (b *BlockedEvals) unblock(computedClass, quota string, index uint64) {
   534  	b.l.Lock()
   535  	defer b.l.Unlock()
   536  
   537  	// Protect against the case of a flush.
   538  	if !b.enabled {
   539  		return
   540  	}
   541  
   542  	// Every eval that has escaped computed node class has to be unblocked
   543  	// because any node could potentially be feasible.
   544  	numEscaped := len(b.escaped)
   545  	numQuotaLimit := 0
   546  	unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4))
   547  
   548  	if numEscaped != 0 && computedClass != "" {
   549  		for id, wrapped := range b.escaped {
   550  			unblocked[wrapped.eval] = wrapped.token
   551  			delete(b.escaped, id)
   552  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   553  
   554  			if wrapped.eval.QuotaLimitReached != "" {
   555  				numQuotaLimit++
   556  			}
   557  		}
   558  	}
   559  
   560  	// We unblock any eval that is explicitly eligible for the computed class
   561  	// and also any eval that is not eligible or uneligible. This signifies that
   562  	// when the evaluation was originally run through the scheduler, that it
   563  	// never saw a node with the given computed class and thus needs to be
   564  	// unblocked for correctness.
   565  	for id, wrapped := range b.captured {
   566  		if quota != "" && wrapped.eval.QuotaLimitReached != quota {
   567  			// We are unblocking based on quota and this eval doesn't match
   568  			continue
   569  		} else if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig {
   570  			// Can skip because the eval has explicitly marked the node class
   571  			// as ineligible.
   572  			continue
   573  		}
   574  
   575  		// Unblock the evaluation because it is either for the matching quota,
   576  		// is eligible based on the computed node class, or never seen the
   577  		// computed node class.
   578  		unblocked[wrapped.eval] = wrapped.token
   579  		delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   580  		delete(b.captured, id)
   581  		if wrapped.eval.QuotaLimitReached != "" {
   582  			numQuotaLimit++
   583  		}
   584  	}
   585  
   586  	if l := len(unblocked); l != 0 {
   587  		// Update the counters
   588  		b.stats.TotalEscaped = 0
   589  		b.stats.TotalBlocked -= l
   590  		b.stats.TotalQuotaLimit -= numQuotaLimit
   591  
   592  		// Enqueue all the unblocked evals into the broker.
   593  		b.evalBroker.EnqueueAll(unblocked)
   594  	}
   595  }
   596  
   597  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   598  // failure.
   599  func (b *BlockedEvals) UnblockFailed() {
   600  	b.l.Lock()
   601  	defer b.l.Unlock()
   602  
   603  	// Do nothing if not enabled
   604  	if !b.enabled {
   605  		return
   606  	}
   607  
   608  	quotaLimit := 0
   609  	unblocked := make(map[*structs.Evaluation]string, 4)
   610  	for id, wrapped := range b.captured {
   611  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   612  			unblocked[wrapped.eval] = wrapped.token
   613  			delete(b.captured, id)
   614  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   615  			if wrapped.eval.QuotaLimitReached != "" {
   616  				quotaLimit++
   617  			}
   618  		}
   619  	}
   620  
   621  	for id, wrapped := range b.escaped {
   622  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   623  			unblocked[wrapped.eval] = wrapped.token
   624  			delete(b.escaped, id)
   625  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   626  			b.stats.TotalEscaped -= 1
   627  			if wrapped.eval.QuotaLimitReached != "" {
   628  				quotaLimit++
   629  			}
   630  		}
   631  	}
   632  
   633  	if l := len(unblocked); l > 0 {
   634  		b.stats.TotalBlocked -= l
   635  		b.stats.TotalQuotaLimit -= quotaLimit
   636  		b.evalBroker.EnqueueAll(unblocked)
   637  	}
   638  }
   639  
   640  // GetDuplicates returns all the duplicate evaluations and blocks until the
   641  // passed timeout.
   642  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   643  	var timeoutTimer *time.Timer
   644  	var timeoutCh <-chan time.Time
   645  SCAN:
   646  	b.l.Lock()
   647  	if len(b.duplicates) != 0 {
   648  		dups := b.duplicates
   649  		b.duplicates = nil
   650  		b.l.Unlock()
   651  		return dups
   652  	}
   653  
   654  	// Capture chans inside the lock to prevent a race with them getting
   655  	// reset in Flush
   656  	dupCh := b.duplicateCh
   657  	stopCh := b.stopCh
   658  	b.l.Unlock()
   659  
   660  	// Create the timer
   661  	if timeoutTimer == nil && timeout != 0 {
   662  		timeoutTimer = time.NewTimer(timeout)
   663  		timeoutCh = timeoutTimer.C
   664  		defer timeoutTimer.Stop()
   665  	}
   666  
   667  	select {
   668  	case <-stopCh:
   669  		return nil
   670  	case <-timeoutCh:
   671  		return nil
   672  	case <-dupCh:
   673  		goto SCAN
   674  	}
   675  }
   676  
   677  // Flush is used to clear the state of blocked evaluations.
   678  func (b *BlockedEvals) Flush() {
   679  	b.l.Lock()
   680  	defer b.l.Unlock()
   681  
   682  	// Reset the blocked eval tracker.
   683  	b.stats.TotalEscaped = 0
   684  	b.stats.TotalBlocked = 0
   685  	b.stats.TotalQuotaLimit = 0
   686  	b.captured = make(map[string]wrappedEval)
   687  	b.escaped = make(map[string]wrappedEval)
   688  	b.jobs = make(map[structs.NamespacedID]string)
   689  	b.unblockIndexes = make(map[string]uint64)
   690  	b.timetable = nil
   691  	b.duplicates = nil
   692  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   693  	b.stopCh = make(chan struct{})
   694  	b.duplicateCh = make(chan struct{}, 1)
   695  	b.system = newSystemEvals()
   696  }
   697  
   698  // Stats is used to query the state of the blocked eval tracker.
   699  func (b *BlockedEvals) Stats() *BlockedStats {
   700  	// Allocate a new stats struct
   701  	stats := new(BlockedStats)
   702  
   703  	b.l.RLock()
   704  	defer b.l.RUnlock()
   705  
   706  	// Copy all the stats
   707  	stats.TotalEscaped = b.stats.TotalEscaped
   708  	stats.TotalBlocked = b.stats.TotalBlocked
   709  	stats.TotalQuotaLimit = b.stats.TotalQuotaLimit
   710  	return stats
   711  }
   712  
   713  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   714  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh <-chan struct{}) {
   715  	for {
   716  		select {
   717  		case <-time.After(period):
   718  			stats := b.Stats()
   719  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_quota_limit"}, float32(stats.TotalQuotaLimit))
   720  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   721  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   722  		case <-stopCh:
   723  			return
   724  		}
   725  	}
   726  }
   727  
   728  // prune is a long lived function that prunes unnecessary objects on a timer.
   729  func (b *BlockedEvals) prune(stopCh <-chan struct{}) {
   730  	ticker := time.NewTicker(pruneInterval)
   731  	defer ticker.Stop()
   732  
   733  	for {
   734  		select {
   735  		case <-stopCh:
   736  			return
   737  		case <-ticker.C:
   738  			b.pruneUnblockIndexes()
   739  		}
   740  	}
   741  }
   742  
   743  // pruneUnblockIndexes is used to prune any tracked entry that is excessively
   744  // old. This protects againsts unbounded growth of the map.
   745  func (b *BlockedEvals) pruneUnblockIndexes() {
   746  	b.l.Lock()
   747  	defer b.l.Unlock()
   748  
   749  	if b.timetable == nil {
   750  		return
   751  	}
   752  
   753  	cutoff := time.Now().UTC().Add(-1 * pruneThreshold)
   754  	oldThreshold := b.timetable.NearestIndex(cutoff)
   755  
   756  	for key, index := range b.unblockIndexes {
   757  		if index < oldThreshold {
   758  			delete(b.unblockIndexes, key)
   759  		}
   760  	}
   761  }