github.com/hernad/nomad@v1.6.112/nomad/blocked_evals.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/armon/go-metrics"
    11  	"github.com/hashicorp/go-hclog"
    12  	"github.com/hernad/nomad/helper"
    13  	"github.com/hernad/nomad/nomad/structs"
    14  )
    15  
    16  const (
    17  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    18  	// should be large to ensure that the FSM doesn't block when calling Unblock
    19  	// as this would apply back-pressure on Raft.
    20  	unblockBuffer = 8096
    21  
    22  	// pruneInterval is the interval at which we prune objects from the
    23  	// BlockedEvals tracker
    24  	pruneInterval = 5 * time.Minute
    25  
    26  	// pruneThreshold is the threshold after which objects will be pruned.
    27  	pruneThreshold = 15 * time.Minute
    28  )
    29  
    30  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    31  // certain class of nodes becomes available. An evaluation is put into the
    32  // blocked state when it is run through the scheduler and produced failed
    33  // allocations. It is unblocked when the capacity of a node that could run the
    34  // failed allocation becomes available.
    35  type BlockedEvals struct {
    36  	// logger is the logger to use by the blocked eval tracker.
    37  	logger hclog.Logger
    38  
    39  	evalBroker *EvalBroker
    40  	enabled    bool
    41  	stats      *BlockedStats
    42  	l          sync.RWMutex
    43  
    44  	// captured is the set of evaluations that are captured by computed node
    45  	// classes.
    46  	captured map[string]wrappedEval
    47  
    48  	// escaped is the set of evaluations that have escaped computed node
    49  	// classes.
    50  	escaped map[string]wrappedEval
    51  
    52  	// system is the set of system evaluations that failed to start on nodes because of
    53  	// resource constraints.
    54  	system *systemEvals
    55  
    56  	// unblockCh is used to buffer unblocking of evaluations.
    57  	capacityChangeCh chan *capacityUpdate
    58  
    59  	// jobs is the map of blocked job and is used to ensure that only one
    60  	// blocked eval exists for each job. The value is the blocked evaluation ID.
    61  	jobs map[structs.NamespacedID]string
    62  
    63  	// unblockIndexes maps computed node classes or quota name to the index in
    64  	// which they were unblocked. This is used to check if an evaluation could
    65  	// have been unblocked between the time they were in the scheduler and the
    66  	// time they are being blocked.
    67  	unblockIndexes map[string]uint64
    68  
    69  	// duplicates is the set of evaluations for jobs that had pre-existing
    70  	// blocked evaluations. These should be marked as cancelled since only one
    71  	// blocked eval is needed per job.
    72  	duplicates []*structs.Evaluation
    73  
    74  	// duplicateCh is used to signal that a duplicate eval was added to the
    75  	// duplicate set. It can be used to unblock waiting callers looking for
    76  	// duplicates.
    77  	duplicateCh chan struct{}
    78  
    79  	// timetable is used to correlate indexes with their insertion time. This
    80  	// allows us to prune based on time.
    81  	timetable *TimeTable
    82  
    83  	// stopCh is used to stop any created goroutines.
    84  	stopCh chan struct{}
    85  }
    86  
    87  // capacityUpdate stores unblock data.
    88  type capacityUpdate struct {
    89  	computedClass string
    90  	quotaChange   string
    91  	index         uint64
    92  }
    93  
    94  // wrappedEval captures both the evaluation and the optional token
    95  type wrappedEval struct {
    96  	eval  *structs.Evaluation
    97  	token string
    98  }
    99  
   100  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
   101  // unblocked evals into the passed broker.
   102  func NewBlockedEvals(evalBroker *EvalBroker, logger hclog.Logger) *BlockedEvals {
   103  	return &BlockedEvals{
   104  		logger:           logger.Named("blocked_evals"),
   105  		evalBroker:       evalBroker,
   106  		captured:         make(map[string]wrappedEval),
   107  		escaped:          make(map[string]wrappedEval),
   108  		system:           newSystemEvals(),
   109  		jobs:             make(map[structs.NamespacedID]string),
   110  		unblockIndexes:   make(map[string]uint64),
   111  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
   112  		duplicateCh:      make(chan struct{}, 1),
   113  		stopCh:           make(chan struct{}),
   114  		stats:            NewBlockedStats(),
   115  	}
   116  }
   117  
   118  // Enabled is used to check if the broker is enabled.
   119  func (b *BlockedEvals) Enabled() bool {
   120  	b.l.RLock()
   121  	defer b.l.RUnlock()
   122  	return b.enabled
   123  }
   124  
   125  // SetEnabled is used to control if the blocked eval tracker is enabled. The
   126  // tracker should only be enabled on the active leader.
   127  func (b *BlockedEvals) SetEnabled(enabled bool) {
   128  	b.l.Lock()
   129  	if b.enabled == enabled {
   130  		// No-op
   131  		b.l.Unlock()
   132  		return
   133  	} else if enabled {
   134  		go b.watchCapacity(b.stopCh, b.capacityChangeCh)
   135  		go b.prune(b.stopCh)
   136  	} else {
   137  		close(b.stopCh)
   138  	}
   139  	b.enabled = enabled
   140  	b.l.Unlock()
   141  	if !enabled {
   142  		b.Flush()
   143  	}
   144  }
   145  
   146  func (b *BlockedEvals) SetTimetable(timetable *TimeTable) {
   147  	b.l.Lock()
   148  	b.timetable = timetable
   149  	b.l.Unlock()
   150  }
   151  
   152  // Block tracks the passed evaluation and enqueues it into the eval broker when
   153  // a suitable node calls unblock.
   154  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   155  	b.processBlock(eval, "")
   156  }
   157  
   158  // Reblock tracks the passed evaluation and enqueues it into the eval broker when
   159  // a suitable node calls unblock. Reblock should be used over Block when the
   160  // blocking is occurring by an outstanding evaluation. The token is the
   161  // evaluation's token.
   162  func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) {
   163  	b.processBlock(eval, token)
   164  }
   165  
   166  // processBlock is the implementation of blocking an evaluation. It supports
   167  // taking an optional evaluation token to use when reblocking an evaluation that
   168  // may be outstanding.
   169  func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) {
   170  	b.l.Lock()
   171  	defer b.l.Unlock()
   172  
   173  	// Do nothing if not enabled
   174  	if !b.enabled {
   175  		return
   176  	}
   177  
   178  	// Handle the new evaluation being for a job we are already tracking.
   179  	if b.processBlockJobDuplicate(eval) {
   180  		// If process block job duplicate returns true, the new evaluation has
   181  		// been marked as a duplicate and we have nothing to do, so return
   182  		// early.
   183  		return
   184  	}
   185  
   186  	// Check if the eval missed an unblock while it was in the scheduler at an
   187  	// older index. The scheduler could have been invoked with a snapshot of
   188  	// state that was prior to additional capacity being added or allocations
   189  	// becoming terminal.
   190  	if b.missedUnblock(eval) {
   191  		// Just re-enqueue the eval immediately. We pass the token so that the
   192  		// eval_broker can properly handle the case in which the evaluation is
   193  		// still outstanding.
   194  		b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token})
   195  		return
   196  	}
   197  
   198  	// Mark the job as tracked.
   199  	b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)] = eval.ID
   200  	b.stats.Block(eval)
   201  
   202  	// Track that the evaluation is being added due to reaching the quota limit
   203  	if eval.QuotaLimitReached != "" {
   204  		b.stats.TotalQuotaLimit++
   205  	}
   206  
   207  	// Wrap the evaluation, capturing its token.
   208  	wrapped := wrappedEval{
   209  		eval:  eval,
   210  		token: token,
   211  	}
   212  
   213  	// If the eval has escaped, meaning computed node classes could not capture
   214  	// the constraints of the job, we store the eval separately as we have to
   215  	// unblock it whenever node capacity changes. This is because we don't know
   216  	// what node class is feasible for the jobs constraints.
   217  	if eval.EscapedComputedClass {
   218  		b.escaped[eval.ID] = wrapped
   219  		b.stats.TotalEscaped++
   220  		return
   221  	}
   222  
   223  	// System evals are indexed by node and re-processed on utilization changes in
   224  	// existing nodes
   225  	if eval.Type == structs.JobTypeSystem {
   226  		b.system.Add(eval, token)
   227  	}
   228  
   229  	// Add the eval to the set of blocked evals whose jobs constraints are
   230  	// captured by computed node class.
   231  	b.captured[eval.ID] = wrapped
   232  }
   233  
   234  // processBlockJobDuplicate handles the case where the new eval is for a job
   235  // that we are already tracking. If the eval is a duplicate, we add the older
   236  // evaluation by Raft index to the list of duplicates such that it can be
   237  // cancelled. We only ever want one blocked evaluation per job, otherwise we
   238  // would create unnecessary work for the scheduler as multiple evals for the
   239  // same job would be run, all producing the same outcome. It is critical to
   240  // prefer the newer evaluation, since it will contain the most up to date set of
   241  // class eligibility. The return value is set to true, if the passed evaluation
   242  // is cancelled. This should be called with the lock held.
   243  func (b *BlockedEvals) processBlockJobDuplicate(eval *structs.Evaluation) (newCancelled bool) {
   244  	existingID, hasExisting := b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)]
   245  	if !hasExisting {
   246  		return
   247  	}
   248  
   249  	var dup *structs.Evaluation
   250  	existingW, ok := b.captured[existingID]
   251  	if ok {
   252  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   253  			delete(b.captured, existingID)
   254  			dup = existingW.eval
   255  			b.stats.Unblock(dup)
   256  		} else {
   257  			dup = eval
   258  			newCancelled = true
   259  		}
   260  	} else {
   261  		existingW, ok = b.escaped[existingID]
   262  		if !ok {
   263  			// This is a programming error
   264  			b.logger.Error("existing blocked evaluation is neither tracked as captured or escaped", "existing_id", existingID)
   265  			delete(b.jobs, structs.NewNamespacedID(eval.JobID, eval.Namespace))
   266  			return
   267  		}
   268  
   269  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   270  			delete(b.escaped, existingID)
   271  			b.stats.TotalEscaped--
   272  			dup = existingW.eval
   273  		} else {
   274  			dup = eval
   275  			newCancelled = true
   276  		}
   277  	}
   278  
   279  	b.duplicates = append(b.duplicates, dup)
   280  
   281  	// Unblock any waiter.
   282  	select {
   283  	case b.duplicateCh <- struct{}{}:
   284  	default:
   285  	}
   286  
   287  	return
   288  }
   289  
   290  // latestEvalIndex returns the max of the evaluations create and snapshot index
   291  func latestEvalIndex(eval *structs.Evaluation) uint64 {
   292  	if eval == nil {
   293  		return 0
   294  	}
   295  
   296  	return helper.Max(eval.CreateIndex, eval.SnapshotIndex)
   297  }
   298  
   299  // missedUnblock returns whether an evaluation missed an unblock while it was in
   300  // the scheduler. Since the scheduler can operate at an index in the past, the
   301  // evaluation may have been processed missing data that would allow it to
   302  // complete. This method returns if that is the case and should be called with
   303  // the lock held.
   304  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   305  	var max uint64 = 0
   306  	for id, index := range b.unblockIndexes {
   307  		// Calculate the max unblock index
   308  		if max < index {
   309  			max = index
   310  		}
   311  
   312  		// The evaluation is blocked because it has hit a quota limit not class
   313  		// eligibility
   314  		if eval.QuotaLimitReached != "" {
   315  			if eval.QuotaLimitReached != id {
   316  				// Not a match
   317  				continue
   318  			} else if eval.SnapshotIndex < index {
   319  				// The evaluation was processed before the quota specification was
   320  				// updated, so unblock the evaluation.
   321  				return true
   322  			}
   323  
   324  			// The evaluation was processed having seen all changes to the quota
   325  			return false
   326  		}
   327  
   328  		elig, ok := eval.ClassEligibility[id]
   329  		if !ok && eval.SnapshotIndex < index {
   330  			// The evaluation was processed and did not encounter this class
   331  			// because it was added after it was processed. Thus for correctness
   332  			// we need to unblock it.
   333  			return true
   334  		}
   335  
   336  		// The evaluation could use the computed node class and the eval was
   337  		// processed before the last unblock.
   338  		if elig && eval.SnapshotIndex < index {
   339  			return true
   340  		}
   341  	}
   342  
   343  	// If the evaluation has escaped, and the map contains an index older than
   344  	// the evaluations, it should be unblocked.
   345  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   346  		return true
   347  	}
   348  
   349  	// The evaluation is ahead of all recent unblocks.
   350  	return false
   351  }
   352  
   353  // Untrack causes any blocked evaluation for the passed job to be no longer
   354  // tracked. Untrack is called when there is a successful evaluation for the job
   355  // and a blocked evaluation is no longer needed.
   356  func (b *BlockedEvals) Untrack(jobID, namespace string) {
   357  	b.l.Lock()
   358  	defer b.l.Unlock()
   359  
   360  	// Do nothing if not enabled
   361  	if !b.enabled {
   362  		return
   363  	}
   364  
   365  	nsID := structs.NewNamespacedID(jobID, namespace)
   366  
   367  	if evals, ok := b.system.JobEvals(nsID); ok {
   368  		for _, e := range evals {
   369  			b.system.Remove(e)
   370  			b.stats.Unblock(e)
   371  		}
   372  		return
   373  	}
   374  
   375  	// Get the evaluation ID to cancel
   376  	evalID, ok := b.jobs[nsID]
   377  	if !ok {
   378  		// No blocked evaluation so exit
   379  		return
   380  	}
   381  
   382  	// Attempt to delete the evaluation
   383  	if w, ok := b.captured[evalID]; ok {
   384  		delete(b.jobs, nsID)
   385  		delete(b.captured, evalID)
   386  		b.stats.Unblock(w.eval)
   387  		if w.eval.QuotaLimitReached != "" {
   388  			b.stats.TotalQuotaLimit--
   389  		}
   390  	}
   391  
   392  	if w, ok := b.escaped[evalID]; ok {
   393  		delete(b.jobs, nsID)
   394  		delete(b.escaped, evalID)
   395  		b.stats.TotalEscaped--
   396  		b.stats.Unblock(w.eval)
   397  		if w.eval.QuotaLimitReached != "" {
   398  			b.stats.TotalQuotaLimit--
   399  		}
   400  	}
   401  }
   402  
   403  // Unblock causes any evaluation that could potentially make progress on a
   404  // capacity change on the passed computed node class to be enqueued into the
   405  // eval broker.
   406  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   407  	b.l.Lock()
   408  
   409  	// Do nothing if not enabled
   410  	if !b.enabled {
   411  		b.l.Unlock()
   412  		return
   413  	}
   414  
   415  	// Store the index in which the unblock happened. We use this on subsequent
   416  	// block calls in case the evaluation was in the scheduler when a trigger
   417  	// occurred.
   418  	b.unblockIndexes[computedClass] = index
   419  
   420  	// Capture chan in lock as Flush overwrites it
   421  	ch := b.capacityChangeCh
   422  	done := b.stopCh
   423  	b.l.Unlock()
   424  
   425  	select {
   426  	case <-done:
   427  	case ch <- &capacityUpdate{
   428  		computedClass: computedClass,
   429  		index:         index,
   430  	}:
   431  	}
   432  }
   433  
   434  // UnblockQuota causes any evaluation that could potentially make progress on a
   435  // capacity change on the passed quota to be enqueued into the eval broker.
   436  func (b *BlockedEvals) UnblockQuota(quota string, index uint64) {
   437  	// Nothing to do
   438  	if quota == "" {
   439  		return
   440  	}
   441  
   442  	b.l.Lock()
   443  
   444  	// Do nothing if not enabled
   445  	if !b.enabled {
   446  		b.l.Unlock()
   447  		return
   448  	}
   449  
   450  	// Store the index in which the unblock happened. We use this on subsequent
   451  	// block calls in case the evaluation was in the scheduler when a trigger
   452  	// occurred.
   453  	b.unblockIndexes[quota] = index
   454  	ch := b.capacityChangeCh
   455  	done := b.stopCh
   456  	b.l.Unlock()
   457  
   458  	select {
   459  	case <-done:
   460  	case ch <- &capacityUpdate{
   461  		quotaChange: quota,
   462  		index:       index,
   463  	}:
   464  	}
   465  }
   466  
   467  // UnblockClassAndQuota causes any evaluation that could potentially make
   468  // progress on a capacity change on the passed computed node class or quota to
   469  // be enqueued into the eval broker.
   470  func (b *BlockedEvals) UnblockClassAndQuota(class, quota string, index uint64) {
   471  	b.l.Lock()
   472  
   473  	// Do nothing if not enabled
   474  	if !b.enabled {
   475  		b.l.Unlock()
   476  		return
   477  	}
   478  
   479  	// Store the index in which the unblock happened. We use this on subsequent
   480  	// block calls in case the evaluation was in the scheduler when a trigger
   481  	// occurred.
   482  	if quota != "" {
   483  		b.unblockIndexes[quota] = index
   484  	}
   485  	b.unblockIndexes[class] = index
   486  
   487  	// Capture chan inside the lock to prevent a race with it getting reset
   488  	// in Flush.
   489  	ch := b.capacityChangeCh
   490  	done := b.stopCh
   491  	b.l.Unlock()
   492  
   493  	select {
   494  	case <-done:
   495  	case ch <- &capacityUpdate{
   496  		computedClass: class,
   497  		quotaChange:   quota,
   498  		index:         index,
   499  	}:
   500  	}
   501  }
   502  
   503  // UnblockNode finds any blocked evalution that's node specific (system jobs) and enqueues
   504  // it on the eval broker
   505  func (b *BlockedEvals) UnblockNode(nodeID string, index uint64) {
   506  	b.l.Lock()
   507  	defer b.l.Unlock()
   508  
   509  	evals, ok := b.system.NodeEvals(nodeID)
   510  
   511  	// Do nothing if not enabled
   512  	if !b.enabled || !ok || len(evals) == 0 {
   513  		return
   514  	}
   515  
   516  	for e := range evals {
   517  		b.system.Remove(e)
   518  		b.stats.Unblock(e)
   519  	}
   520  
   521  	b.evalBroker.EnqueueAll(evals)
   522  }
   523  
   524  // watchCapacity is a long lived function that watches for capacity changes in
   525  // nodes and unblocks the correct set of evals.
   526  func (b *BlockedEvals) watchCapacity(stopCh <-chan struct{}, changeCh <-chan *capacityUpdate) {
   527  	for {
   528  		select {
   529  		case <-stopCh:
   530  			return
   531  		case update := <-changeCh:
   532  			b.unblock(update.computedClass, update.quotaChange, update.index)
   533  		}
   534  	}
   535  }
   536  
   537  func (b *BlockedEvals) unblock(computedClass, quota string, index uint64) {
   538  	b.l.Lock()
   539  	defer b.l.Unlock()
   540  
   541  	// Protect against the case of a flush.
   542  	if !b.enabled {
   543  		return
   544  	}
   545  
   546  	// Every eval that has escaped computed node class has to be unblocked
   547  	// because any node could potentially be feasible.
   548  	numEscaped := len(b.escaped)
   549  	numQuotaLimit := 0
   550  	unblocked := make(map[*structs.Evaluation]string, helper.Max(numEscaped, 4))
   551  
   552  	if numEscaped != 0 && computedClass != "" {
   553  		for id, wrapped := range b.escaped {
   554  			unblocked[wrapped.eval] = wrapped.token
   555  			delete(b.escaped, id)
   556  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   557  
   558  			if wrapped.eval.QuotaLimitReached != "" {
   559  				numQuotaLimit++
   560  			}
   561  		}
   562  	}
   563  
   564  	// We unblock any eval that is explicitly eligible for the computed class
   565  	// and also any eval that is not eligible or uneligible. This signifies that
   566  	// when the evaluation was originally run through the scheduler, that it
   567  	// never saw a node with the given computed class and thus needs to be
   568  	// unblocked for correctness.
   569  	for id, wrapped := range b.captured {
   570  		if quota != "" && wrapped.eval.QuotaLimitReached != quota {
   571  			// We are unblocking based on quota and this eval doesn't match
   572  			continue
   573  		} else if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig {
   574  			// Can skip because the eval has explicitly marked the node class
   575  			// as ineligible.
   576  			continue
   577  		}
   578  
   579  		// Unblock the evaluation because it is either for the matching quota,
   580  		// is eligible based on the computed node class, or never seen the
   581  		// computed node class.
   582  		unblocked[wrapped.eval] = wrapped.token
   583  		delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   584  		delete(b.captured, id)
   585  		if wrapped.eval.QuotaLimitReached != "" {
   586  			numQuotaLimit++
   587  		}
   588  	}
   589  
   590  	if len(unblocked) != 0 {
   591  		// Update the counters
   592  		b.stats.TotalEscaped = 0
   593  		b.stats.TotalQuotaLimit -= numQuotaLimit
   594  		for eval := range unblocked {
   595  			b.stats.Unblock(eval)
   596  		}
   597  
   598  		// Enqueue all the unblocked evals into the broker.
   599  		b.evalBroker.EnqueueAll(unblocked)
   600  	}
   601  }
   602  
   603  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   604  // failure.
   605  func (b *BlockedEvals) UnblockFailed() {
   606  	b.l.Lock()
   607  	defer b.l.Unlock()
   608  
   609  	// Do nothing if not enabled
   610  	if !b.enabled {
   611  		return
   612  	}
   613  
   614  	quotaLimit := 0
   615  	unblocked := make(map[*structs.Evaluation]string, 4)
   616  	for id, wrapped := range b.captured {
   617  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   618  			unblocked[wrapped.eval] = wrapped.token
   619  			delete(b.captured, id)
   620  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   621  			if wrapped.eval.QuotaLimitReached != "" {
   622  				quotaLimit++
   623  			}
   624  		}
   625  	}
   626  
   627  	for id, wrapped := range b.escaped {
   628  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   629  			unblocked[wrapped.eval] = wrapped.token
   630  			delete(b.escaped, id)
   631  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   632  			b.stats.TotalEscaped -= 1
   633  			if wrapped.eval.QuotaLimitReached != "" {
   634  				quotaLimit++
   635  			}
   636  		}
   637  	}
   638  
   639  	if len(unblocked) > 0 {
   640  		b.stats.TotalQuotaLimit -= quotaLimit
   641  		for eval := range unblocked {
   642  			b.stats.Unblock(eval)
   643  		}
   644  
   645  		b.evalBroker.EnqueueAll(unblocked)
   646  	}
   647  }
   648  
   649  // GetDuplicates returns all the duplicate evaluations and blocks until the
   650  // passed timeout.
   651  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   652  	var timeoutTimer *time.Timer
   653  	var timeoutCh <-chan time.Time
   654  SCAN:
   655  	b.l.Lock()
   656  	if len(b.duplicates) != 0 {
   657  		dups := b.duplicates
   658  		b.duplicates = nil
   659  		b.l.Unlock()
   660  		return dups
   661  	}
   662  
   663  	// Capture chans inside the lock to prevent a race with them getting
   664  	// reset in Flush
   665  	dupCh := b.duplicateCh
   666  	stopCh := b.stopCh
   667  	b.l.Unlock()
   668  
   669  	// Create the timer
   670  	if timeoutTimer == nil && timeout != 0 {
   671  		timeoutTimer = time.NewTimer(timeout)
   672  		timeoutCh = timeoutTimer.C
   673  		defer timeoutTimer.Stop()
   674  	}
   675  
   676  	select {
   677  	case <-stopCh:
   678  		return nil
   679  	case <-timeoutCh:
   680  		return nil
   681  	case <-dupCh:
   682  		goto SCAN
   683  	}
   684  }
   685  
   686  // Flush is used to clear the state of blocked evaluations.
   687  func (b *BlockedEvals) Flush() {
   688  	b.l.Lock()
   689  	defer b.l.Unlock()
   690  
   691  	// Reset the blocked eval tracker.
   692  	b.stats.TotalEscaped = 0
   693  	b.stats.TotalBlocked = 0
   694  	b.stats.TotalQuotaLimit = 0
   695  	b.stats.BlockedResources = NewBlockedResourcesStats()
   696  	b.captured = make(map[string]wrappedEval)
   697  	b.escaped = make(map[string]wrappedEval)
   698  	b.jobs = make(map[structs.NamespacedID]string)
   699  	b.unblockIndexes = make(map[string]uint64)
   700  	b.timetable = nil
   701  	b.duplicates = nil
   702  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   703  	b.stopCh = make(chan struct{})
   704  	b.duplicateCh = make(chan struct{}, 1)
   705  	b.system = newSystemEvals()
   706  }
   707  
   708  // Stats is used to query the state of the blocked eval tracker.
   709  func (b *BlockedEvals) Stats() *BlockedStats {
   710  	// Allocate a new stats struct
   711  	stats := NewBlockedStats()
   712  
   713  	b.l.RLock()
   714  	defer b.l.RUnlock()
   715  
   716  	// Copy all the stats
   717  	stats.TotalEscaped = b.stats.TotalEscaped
   718  	stats.TotalBlocked = b.stats.TotalBlocked
   719  	stats.TotalQuotaLimit = b.stats.TotalQuotaLimit
   720  	stats.BlockedResources = b.stats.BlockedResources.Copy()
   721  
   722  	return stats
   723  }
   724  
   725  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   726  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh <-chan struct{}) {
   727  	timer, stop := helper.NewSafeTimer(period)
   728  	defer stop()
   729  
   730  	for {
   731  		timer.Reset(period)
   732  
   733  		select {
   734  		case <-timer.C:
   735  			stats := b.Stats()
   736  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_quota_limit"}, float32(stats.TotalQuotaLimit))
   737  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   738  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   739  
   740  			for k, v := range stats.BlockedResources.ByJob {
   741  				labels := []metrics.Label{
   742  					{Name: "namespace", Value: k.Namespace},
   743  					{Name: "job", Value: k.ID},
   744  				}
   745  				metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "job", "cpu"}, float32(v.CPU), labels)
   746  				metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "job", "memory"}, float32(v.MemoryMB), labels)
   747  			}
   748  
   749  			for k, v := range stats.BlockedResources.ByClassInDC {
   750  				labels := []metrics.Label{
   751  					{Name: "datacenter", Value: k.dc},
   752  					{Name: "node_class", Value: k.class},
   753  				}
   754  				metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "cpu"}, float32(v.CPU), labels)
   755  				metrics.SetGaugeWithLabels([]string{"nomad", "blocked_evals", "memory"}, float32(v.MemoryMB), labels)
   756  			}
   757  		case <-stopCh:
   758  			return
   759  		}
   760  	}
   761  }
   762  
   763  // prune is a long lived function that prunes unnecessary objects on a timer.
   764  func (b *BlockedEvals) prune(stopCh <-chan struct{}) {
   765  	ticker := time.NewTicker(pruneInterval)
   766  	defer ticker.Stop()
   767  
   768  	for {
   769  		select {
   770  		case <-stopCh:
   771  			return
   772  		case t := <-ticker.C:
   773  			cutoff := t.UTC().Add(-1 * pruneThreshold)
   774  			b.pruneUnblockIndexes(cutoff)
   775  			b.pruneStats(cutoff)
   776  		}
   777  	}
   778  }
   779  
   780  // pruneUnblockIndexes is used to prune any tracked entry that is excessively
   781  // old. This protects againsts unbounded growth of the map.
   782  func (b *BlockedEvals) pruneUnblockIndexes(cutoff time.Time) {
   783  	b.l.Lock()
   784  	defer b.l.Unlock()
   785  
   786  	if b.timetable == nil {
   787  		return
   788  	}
   789  
   790  	oldThreshold := b.timetable.NearestIndex(cutoff)
   791  	for key, index := range b.unblockIndexes {
   792  		if index < oldThreshold {
   793  			delete(b.unblockIndexes, key)
   794  		}
   795  	}
   796  }
   797  
   798  // pruneStats is used to prune any zero value stats that are excessively old.
   799  func (b *BlockedEvals) pruneStats(cutoff time.Time) {
   800  	b.l.Lock()
   801  	defer b.l.Unlock()
   802  
   803  	b.stats.prune(cutoff)
   804  }