github.com/bigcommerce/nomad@v0.9.3-bc/nomad/blocked_evals.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	metrics "github.com/armon/go-metrics"
     8  	"github.com/hashicorp/consul/lib"
     9  	log "github.com/hashicorp/go-hclog"
    10  	"github.com/hashicorp/nomad/helper"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  )
    13  
    14  const (
    15  	// unblockBuffer is the buffer size for the unblock channel. The buffer
    16  	// should be large to ensure that the FSM doesn't block when calling Unblock
    17  	// as this would apply back-pressure on Raft.
    18  	unblockBuffer = 8096
    19  
    20  	// pruneInterval is the interval at which we prune objects from the
    21  	// BlockedEvals tracker
    22  	pruneInterval = 5 * time.Minute
    23  
    24  	// pruneThreshold is the threshold after which objects will be pruned.
    25  	pruneThreshold = 15 * time.Minute
    26  )
    27  
    28  // BlockedEvals is used to track evaluations that shouldn't be queued until a
    29  // certain class of nodes becomes available. An evaluation is put into the
    30  // blocked state when it is run through the scheduler and produced failed
    31  // allocations. It is unblocked when the capacity of a node that could run the
    32  // failed allocation becomes available.
    33  type BlockedEvals struct {
    34  	// logger is the logger to use by the blocked eval tracker.
    35  	logger log.Logger
    36  
    37  	evalBroker *EvalBroker
    38  	enabled    bool
    39  	stats      *BlockedStats
    40  	l          sync.RWMutex
    41  
    42  	// captured is the set of evaluations that are captured by computed node
    43  	// classes.
    44  	captured map[string]wrappedEval
    45  
    46  	// escaped is the set of evaluations that have escaped computed node
    47  	// classes.
    48  	escaped map[string]wrappedEval
    49  
    50  	// unblockCh is used to buffer unblocking of evaluations.
    51  	capacityChangeCh chan *capacityUpdate
    52  
    53  	// jobs is the map of blocked job and is used to ensure that only one
    54  	// blocked eval exists for each job. The value is the blocked evaluation ID.
    55  	jobs map[structs.NamespacedID]string
    56  
    57  	// unblockIndexes maps computed node classes or quota name to the index in
    58  	// which they were unblocked. This is used to check if an evaluation could
    59  	// have been unblocked between the time they were in the scheduler and the
    60  	// time they are being blocked.
    61  	unblockIndexes map[string]uint64
    62  
    63  	// duplicates is the set of evaluations for jobs that had pre-existing
    64  	// blocked evaluations. These should be marked as cancelled since only one
    65  	// blocked eval is needed per job.
    66  	duplicates []*structs.Evaluation
    67  
    68  	// duplicateCh is used to signal that a duplicate eval was added to the
    69  	// duplicate set. It can be used to unblock waiting callers looking for
    70  	// duplicates.
    71  	duplicateCh chan struct{}
    72  
    73  	// timetable is used to correlate indexes with their insertion time. This
    74  	// allows us to prune based on time.
    75  	timetable *TimeTable
    76  
    77  	// stopCh is used to stop any created goroutines.
    78  	stopCh chan struct{}
    79  }
    80  
    81  // capacityUpdate stores unblock data.
    82  type capacityUpdate struct {
    83  	computedClass string
    84  	quotaChange   string
    85  	index         uint64
    86  }
    87  
    88  // wrappedEval captures both the evaluation and the optional token
    89  type wrappedEval struct {
    90  	eval  *structs.Evaluation
    91  	token string
    92  }
    93  
    94  // BlockedStats returns all the stats about the blocked eval tracker.
    95  type BlockedStats struct {
    96  	// TotalEscaped is the total number of blocked evaluations that have escaped
    97  	// computed node classes.
    98  	TotalEscaped int
    99  
   100  	// TotalBlocked is the total number of blocked evaluations.
   101  	TotalBlocked int
   102  
   103  	// TotalQuotaLimit is the total number of blocked evaluations that are due
   104  	// to the quota limit being reached.
   105  	TotalQuotaLimit int
   106  }
   107  
   108  // NewBlockedEvals creates a new blocked eval tracker that will enqueue
   109  // unblocked evals into the passed broker.
   110  func NewBlockedEvals(evalBroker *EvalBroker, logger log.Logger) *BlockedEvals {
   111  	return &BlockedEvals{
   112  		logger:           logger.Named("blocked_evals"),
   113  		evalBroker:       evalBroker,
   114  		captured:         make(map[string]wrappedEval),
   115  		escaped:          make(map[string]wrappedEval),
   116  		jobs:             make(map[structs.NamespacedID]string),
   117  		unblockIndexes:   make(map[string]uint64),
   118  		capacityChangeCh: make(chan *capacityUpdate, unblockBuffer),
   119  		duplicateCh:      make(chan struct{}, 1),
   120  		stopCh:           make(chan struct{}),
   121  		stats:            new(BlockedStats),
   122  	}
   123  }
   124  
   125  // Enabled is used to check if the broker is enabled.
   126  func (b *BlockedEvals) Enabled() bool {
   127  	b.l.RLock()
   128  	defer b.l.RUnlock()
   129  	return b.enabled
   130  }
   131  
   132  // SetEnabled is used to control if the blocked eval tracker is enabled. The
   133  // tracker should only be enabled on the active leader.
   134  func (b *BlockedEvals) SetEnabled(enabled bool) {
   135  	b.l.Lock()
   136  	if b.enabled == enabled {
   137  		// No-op
   138  		b.l.Unlock()
   139  		return
   140  	} else if enabled {
   141  		go b.watchCapacity(b.stopCh, b.capacityChangeCh)
   142  		go b.prune(b.stopCh)
   143  	} else {
   144  		close(b.stopCh)
   145  	}
   146  	b.enabled = enabled
   147  	b.l.Unlock()
   148  	if !enabled {
   149  		b.Flush()
   150  	}
   151  }
   152  
   153  func (b *BlockedEvals) SetTimetable(timetable *TimeTable) {
   154  	b.l.Lock()
   155  	b.timetable = timetable
   156  	b.l.Unlock()
   157  }
   158  
   159  // Block tracks the passed evaluation and enqueues it into the eval broker when
   160  // a suitable node calls unblock.
   161  func (b *BlockedEvals) Block(eval *structs.Evaluation) {
   162  	b.processBlock(eval, "")
   163  }
   164  
   165  // Reblock tracks the passed evaluation and enqueues it into the eval broker when
   166  // a suitable node calls unblock. Reblock should be used over Block when the
   167  // blocking is occurring by an outstanding evaluation. The token is the
   168  // evaluation's token.
   169  func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) {
   170  	b.processBlock(eval, token)
   171  }
   172  
   173  // processBlock is the implementation of blocking an evaluation. It supports
   174  // taking an optional evaluation token to use when reblocking an evaluation that
   175  // may be outstanding.
   176  func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) {
   177  	b.l.Lock()
   178  	defer b.l.Unlock()
   179  
   180  	// Do nothing if not enabled
   181  	if !b.enabled {
   182  		return
   183  	}
   184  
   185  	// Handle the new evaluation being for a job we are already tracking.
   186  	if b.processBlockJobDuplicate(eval) {
   187  		// If process block job duplicate returns true, the new evaluation has
   188  		// been marked as a duplicate and we have nothing to do, so return
   189  		// early.
   190  		return
   191  	}
   192  
   193  	// Check if the eval missed an unblock while it was in the scheduler at an
   194  	// older index. The scheduler could have been invoked with a snapshot of
   195  	// state that was prior to additional capacity being added or allocations
   196  	// becoming terminal.
   197  	if b.missedUnblock(eval) {
   198  		// Just re-enqueue the eval immediately. We pass the token so that the
   199  		// eval_broker can properly handle the case in which the evaluation is
   200  		// still outstanding.
   201  		b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token})
   202  		return
   203  	}
   204  
   205  	// Mark the job as tracked.
   206  	b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)] = eval.ID
   207  	b.stats.TotalBlocked++
   208  
   209  	// Track that the evaluation is being added due to reaching the quota limit
   210  	if eval.QuotaLimitReached != "" {
   211  		b.stats.TotalQuotaLimit++
   212  	}
   213  
   214  	// Wrap the evaluation, capturing its token.
   215  	wrapped := wrappedEval{
   216  		eval:  eval,
   217  		token: token,
   218  	}
   219  
   220  	// If the eval has escaped, meaning computed node classes could not capture
   221  	// the constraints of the job, we store the eval separately as we have to
   222  	// unblock it whenever node capacity changes. This is because we don't know
   223  	// what node class is feasible for the jobs constraints.
   224  	if eval.EscapedComputedClass {
   225  		b.escaped[eval.ID] = wrapped
   226  		b.stats.TotalEscaped++
   227  		return
   228  	}
   229  
   230  	// Add the eval to the set of blocked evals whose jobs constraints are
   231  	// captured by computed node class.
   232  	b.captured[eval.ID] = wrapped
   233  }
   234  
   235  // processBlockJobDuplicate handles the case where the new eval is for a job
   236  // that we are already tracking. If the eval is a duplicate, we add the older
   237  // evaluation by Raft index to the list of duplicates such that it can be
   238  // cancelled. We only ever want one blocked evaluation per job, otherwise we
   239  // would create unnecessary work for the scheduler as multiple evals for the
   240  // same job would be run, all producing the same outcome. It is critical to
   241  // prefer the newer evaluation, since it will contain the most up to date set of
   242  // class eligibility. The return value is set to true, if the passed evaluation
   243  // is cancelled. This should be called with the lock held.
   244  func (b *BlockedEvals) processBlockJobDuplicate(eval *structs.Evaluation) (newCancelled bool) {
   245  	existingID, hasExisting := b.jobs[structs.NewNamespacedID(eval.JobID, eval.Namespace)]
   246  	if !hasExisting {
   247  		return
   248  	}
   249  
   250  	var dup *structs.Evaluation
   251  	existingW, ok := b.captured[existingID]
   252  	if ok {
   253  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   254  			delete(b.captured, existingID)
   255  			b.stats.TotalBlocked--
   256  			dup = existingW.eval
   257  		} else {
   258  			dup = eval
   259  			newCancelled = true
   260  		}
   261  	} else {
   262  		existingW, ok = b.escaped[existingID]
   263  		if !ok {
   264  			// This is a programming error
   265  			b.logger.Error("existing blocked evaluation is neither tracked as captured or escaped", "existing_id", existingID)
   266  			delete(b.jobs, structs.NewNamespacedID(eval.JobID, eval.Namespace))
   267  			return
   268  		}
   269  
   270  		if latestEvalIndex(existingW.eval) <= latestEvalIndex(eval) {
   271  			delete(b.escaped, existingID)
   272  			b.stats.TotalEscaped--
   273  			dup = existingW.eval
   274  		} else {
   275  			dup = eval
   276  			newCancelled = true
   277  		}
   278  	}
   279  
   280  	b.duplicates = append(b.duplicates, dup)
   281  
   282  	// Unblock any waiter.
   283  	select {
   284  	case b.duplicateCh <- struct{}{}:
   285  	default:
   286  	}
   287  
   288  	return
   289  }
   290  
   291  // latestEvalIndex returns the max of the evaluations create and snapshot index
   292  func latestEvalIndex(eval *structs.Evaluation) uint64 {
   293  	if eval == nil {
   294  		return 0
   295  	}
   296  
   297  	return helper.Uint64Max(eval.CreateIndex, eval.SnapshotIndex)
   298  }
   299  
   300  // missedUnblock returns whether an evaluation missed an unblock while it was in
   301  // the scheduler. Since the scheduler can operate at an index in the past, the
   302  // evaluation may have been processed missing data that would allow it to
   303  // complete. This method returns if that is the case and should be called with
   304  // the lock held.
   305  func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool {
   306  	var max uint64 = 0
   307  	for id, index := range b.unblockIndexes {
   308  		// Calculate the max unblock index
   309  		if max < index {
   310  			max = index
   311  		}
   312  
   313  		// The evaluation is blocked because it has hit a quota limit not class
   314  		// eligibility
   315  		if eval.QuotaLimitReached != "" {
   316  			if eval.QuotaLimitReached != id {
   317  				// Not a match
   318  				continue
   319  			} else if eval.SnapshotIndex < index {
   320  				// The evaluation was processed before the quota specification was
   321  				// updated, so unblock the evaluation.
   322  				return true
   323  			}
   324  
   325  			// The evaluation was processed having seen all changes to the quota
   326  			return false
   327  		}
   328  
   329  		elig, ok := eval.ClassEligibility[id]
   330  		if !ok && eval.SnapshotIndex < index {
   331  			// The evaluation was processed and did not encounter this class
   332  			// because it was added after it was processed. Thus for correctness
   333  			// we need to unblock it.
   334  			return true
   335  		}
   336  
   337  		// The evaluation could use the computed node class and the eval was
   338  		// processed before the last unblock.
   339  		if elig && eval.SnapshotIndex < index {
   340  			return true
   341  		}
   342  	}
   343  
   344  	// If the evaluation has escaped, and the map contains an index older than
   345  	// the evaluations, it should be unblocked.
   346  	if eval.EscapedComputedClass && eval.SnapshotIndex < max {
   347  		return true
   348  	}
   349  
   350  	// The evaluation is ahead of all recent unblocks.
   351  	return false
   352  }
   353  
   354  // Untrack causes any blocked evaluation for the passed job to be no longer
   355  // tracked. Untrack is called when there is a successful evaluation for the job
   356  // and a blocked evaluation is no longer needed.
   357  func (b *BlockedEvals) Untrack(jobID, namespace string) {
   358  	b.l.Lock()
   359  	defer b.l.Unlock()
   360  
   361  	// Do nothing if not enabled
   362  	if !b.enabled {
   363  		return
   364  	}
   365  
   366  	nsID := structs.NewNamespacedID(jobID, namespace)
   367  
   368  	// Get the evaluation ID to cancel
   369  	evalID, ok := b.jobs[nsID]
   370  	if !ok {
   371  		// No blocked evaluation so exit
   372  		return
   373  	}
   374  
   375  	// Attempt to delete the evaluation
   376  	if w, ok := b.captured[evalID]; ok {
   377  		delete(b.jobs, nsID)
   378  		delete(b.captured, evalID)
   379  		b.stats.TotalBlocked--
   380  		if w.eval.QuotaLimitReached != "" {
   381  			b.stats.TotalQuotaLimit--
   382  		}
   383  	}
   384  
   385  	if w, ok := b.escaped[evalID]; ok {
   386  		delete(b.jobs, nsID)
   387  		delete(b.escaped, evalID)
   388  		b.stats.TotalEscaped--
   389  		b.stats.TotalBlocked--
   390  		if w.eval.QuotaLimitReached != "" {
   391  			b.stats.TotalQuotaLimit--
   392  		}
   393  	}
   394  }
   395  
   396  // Unblock causes any evaluation that could potentially make progress on a
   397  // capacity change on the passed computed node class to be enqueued into the
   398  // eval broker.
   399  func (b *BlockedEvals) Unblock(computedClass string, index uint64) {
   400  	b.l.Lock()
   401  
   402  	// Do nothing if not enabled
   403  	if !b.enabled {
   404  		b.l.Unlock()
   405  		return
   406  	}
   407  
   408  	// Store the index in which the unblock happened. We use this on subsequent
   409  	// block calls in case the evaluation was in the scheduler when a trigger
   410  	// occurred.
   411  	b.unblockIndexes[computedClass] = index
   412  	b.l.Unlock()
   413  
   414  	b.capacityChangeCh <- &capacityUpdate{
   415  		computedClass: computedClass,
   416  		index:         index,
   417  	}
   418  }
   419  
   420  // UnblockQuota causes any evaluation that could potentially make progress on a
   421  // capacity change on the passed quota to be enqueued into the eval broker.
   422  func (b *BlockedEvals) UnblockQuota(quota string, index uint64) {
   423  	// Nothing to do
   424  	if quota == "" {
   425  		return
   426  	}
   427  
   428  	b.l.Lock()
   429  
   430  	// Do nothing if not enabled
   431  	if !b.enabled {
   432  		b.l.Unlock()
   433  		return
   434  	}
   435  
   436  	// Store the index in which the unblock happened. We use this on subsequent
   437  	// block calls in case the evaluation was in the scheduler when a trigger
   438  	// occurred.
   439  	b.unblockIndexes[quota] = index
   440  	b.l.Unlock()
   441  
   442  	b.capacityChangeCh <- &capacityUpdate{
   443  		quotaChange: quota,
   444  		index:       index,
   445  	}
   446  }
   447  
   448  // UnblockClassAndQuota causes any evaluation that could potentially make
   449  // progress on a capacity change on the passed computed node class or quota to
   450  // be enqueued into the eval broker.
   451  func (b *BlockedEvals) UnblockClassAndQuota(class, quota string, index uint64) {
   452  	b.l.Lock()
   453  
   454  	// Do nothing if not enabled
   455  	if !b.enabled {
   456  		b.l.Unlock()
   457  		return
   458  	}
   459  
   460  	// Store the index in which the unblock happened. We use this on subsequent
   461  	// block calls in case the evaluation was in the scheduler when a trigger
   462  	// occurred.
   463  	if quota != "" {
   464  		b.unblockIndexes[quota] = index
   465  	}
   466  	b.unblockIndexes[class] = index
   467  
   468  	// Capture chan inside the lock to prevent a race with it getting reset
   469  	// in Flush.
   470  	ch := b.capacityChangeCh
   471  	b.l.Unlock()
   472  
   473  	ch <- &capacityUpdate{
   474  		computedClass: class,
   475  		quotaChange:   quota,
   476  		index:         index,
   477  	}
   478  }
   479  
   480  // watchCapacity is a long lived function that watches for capacity changes in
   481  // nodes and unblocks the correct set of evals.
   482  func (b *BlockedEvals) watchCapacity(stopCh <-chan struct{}, changeCh <-chan *capacityUpdate) {
   483  	for {
   484  		select {
   485  		case <-stopCh:
   486  			return
   487  		case update := <-changeCh:
   488  			b.unblock(update.computedClass, update.quotaChange, update.index)
   489  		}
   490  	}
   491  }
   492  
   493  func (b *BlockedEvals) unblock(computedClass, quota string, index uint64) {
   494  	b.l.Lock()
   495  	defer b.l.Unlock()
   496  
   497  	// Protect against the case of a flush.
   498  	if !b.enabled {
   499  		return
   500  	}
   501  
   502  	// Every eval that has escaped computed node class has to be unblocked
   503  	// because any node could potentially be feasible.
   504  	numEscaped := len(b.escaped)
   505  	numQuotaLimit := 0
   506  	unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4))
   507  
   508  	if numEscaped != 0 && computedClass != "" {
   509  		for id, wrapped := range b.escaped {
   510  			unblocked[wrapped.eval] = wrapped.token
   511  			delete(b.escaped, id)
   512  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   513  
   514  			if wrapped.eval.QuotaLimitReached != "" {
   515  				numQuotaLimit++
   516  			}
   517  		}
   518  	}
   519  
   520  	// We unblock any eval that is explicitly eligible for the computed class
   521  	// and also any eval that is not eligible or uneligible. This signifies that
   522  	// when the evaluation was originally run through the scheduler, that it
   523  	// never saw a node with the given computed class and thus needs to be
   524  	// unblocked for correctness.
   525  	for id, wrapped := range b.captured {
   526  		if quota != "" && wrapped.eval.QuotaLimitReached != quota {
   527  			// We are unblocking based on quota and this eval doesn't match
   528  			continue
   529  		} else if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig {
   530  			// Can skip because the eval has explicitly marked the node class
   531  			// as ineligible.
   532  			continue
   533  		}
   534  
   535  		// Unblock the evaluation because it is either for the matching quota,
   536  		// is eligible based on the computed node class, or never seen the
   537  		// computed node class.
   538  		unblocked[wrapped.eval] = wrapped.token
   539  		delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   540  		delete(b.captured, id)
   541  		if wrapped.eval.QuotaLimitReached != "" {
   542  			numQuotaLimit++
   543  		}
   544  	}
   545  
   546  	if l := len(unblocked); l != 0 {
   547  		// Update the counters
   548  		b.stats.TotalEscaped = 0
   549  		b.stats.TotalBlocked -= l
   550  		b.stats.TotalQuotaLimit -= numQuotaLimit
   551  
   552  		// Enqueue all the unblocked evals into the broker.
   553  		b.evalBroker.EnqueueAll(unblocked)
   554  	}
   555  }
   556  
   557  // UnblockFailed unblocks all blocked evaluation that were due to scheduler
   558  // failure.
   559  func (b *BlockedEvals) UnblockFailed() {
   560  	b.l.Lock()
   561  	defer b.l.Unlock()
   562  
   563  	// Do nothing if not enabled
   564  	if !b.enabled {
   565  		return
   566  	}
   567  
   568  	quotaLimit := 0
   569  	unblocked := make(map[*structs.Evaluation]string, 4)
   570  	for id, wrapped := range b.captured {
   571  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   572  			unblocked[wrapped.eval] = wrapped.token
   573  			delete(b.captured, id)
   574  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   575  			if wrapped.eval.QuotaLimitReached != "" {
   576  				quotaLimit++
   577  			}
   578  		}
   579  	}
   580  
   581  	for id, wrapped := range b.escaped {
   582  		if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans {
   583  			unblocked[wrapped.eval] = wrapped.token
   584  			delete(b.escaped, id)
   585  			delete(b.jobs, structs.NewNamespacedID(wrapped.eval.JobID, wrapped.eval.Namespace))
   586  			b.stats.TotalEscaped -= 1
   587  			if wrapped.eval.QuotaLimitReached != "" {
   588  				quotaLimit++
   589  			}
   590  		}
   591  	}
   592  
   593  	if l := len(unblocked); l > 0 {
   594  		b.stats.TotalBlocked -= l
   595  		b.stats.TotalQuotaLimit -= quotaLimit
   596  		b.evalBroker.EnqueueAll(unblocked)
   597  	}
   598  }
   599  
   600  // GetDuplicates returns all the duplicate evaluations and blocks until the
   601  // passed timeout.
   602  func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation {
   603  	var timeoutTimer *time.Timer
   604  	var timeoutCh <-chan time.Time
   605  SCAN:
   606  	b.l.Lock()
   607  	if len(b.duplicates) != 0 {
   608  		dups := b.duplicates
   609  		b.duplicates = nil
   610  		b.l.Unlock()
   611  		return dups
   612  	}
   613  
   614  	// Capture chans inside the lock to prevent a race with them getting
   615  	// reset in Flush
   616  	dupCh := b.duplicateCh
   617  	stopCh := b.stopCh
   618  	b.l.Unlock()
   619  
   620  	// Create the timer
   621  	if timeoutTimer == nil && timeout != 0 {
   622  		timeoutTimer = time.NewTimer(timeout)
   623  		timeoutCh = timeoutTimer.C
   624  		defer timeoutTimer.Stop()
   625  	}
   626  
   627  	select {
   628  	case <-stopCh:
   629  		return nil
   630  	case <-timeoutCh:
   631  		return nil
   632  	case <-dupCh:
   633  		goto SCAN
   634  	}
   635  }
   636  
   637  // Flush is used to clear the state of blocked evaluations.
   638  func (b *BlockedEvals) Flush() {
   639  	b.l.Lock()
   640  	defer b.l.Unlock()
   641  
   642  	// Reset the blocked eval tracker.
   643  	b.stats.TotalEscaped = 0
   644  	b.stats.TotalBlocked = 0
   645  	b.stats.TotalQuotaLimit = 0
   646  	b.captured = make(map[string]wrappedEval)
   647  	b.escaped = make(map[string]wrappedEval)
   648  	b.jobs = make(map[structs.NamespacedID]string)
   649  	b.unblockIndexes = make(map[string]uint64)
   650  	b.timetable = nil
   651  	b.duplicates = nil
   652  	b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer)
   653  	b.stopCh = make(chan struct{})
   654  	b.duplicateCh = make(chan struct{}, 1)
   655  }
   656  
   657  // Stats is used to query the state of the blocked eval tracker.
   658  func (b *BlockedEvals) Stats() *BlockedStats {
   659  	// Allocate a new stats struct
   660  	stats := new(BlockedStats)
   661  
   662  	b.l.RLock()
   663  	defer b.l.RUnlock()
   664  
   665  	// Copy all the stats
   666  	stats.TotalEscaped = b.stats.TotalEscaped
   667  	stats.TotalBlocked = b.stats.TotalBlocked
   668  	stats.TotalQuotaLimit = b.stats.TotalQuotaLimit
   669  	return stats
   670  }
   671  
   672  // EmitStats is used to export metrics about the blocked eval tracker while enabled
   673  func (b *BlockedEvals) EmitStats(period time.Duration, stopCh <-chan struct{}) {
   674  	for {
   675  		select {
   676  		case <-time.After(period):
   677  			stats := b.Stats()
   678  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_quota_limit"}, float32(stats.TotalQuotaLimit))
   679  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked))
   680  			metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped))
   681  		case <-stopCh:
   682  			return
   683  		}
   684  	}
   685  }
   686  
   687  // prune is a long lived function that prunes unnecessary objects on a timer.
   688  func (b *BlockedEvals) prune(stopCh <-chan struct{}) {
   689  	ticker := time.NewTicker(pruneInterval)
   690  	defer ticker.Stop()
   691  
   692  	for {
   693  		select {
   694  		case <-stopCh:
   695  			return
   696  		case <-ticker.C:
   697  			b.pruneUnblockIndexes()
   698  		}
   699  	}
   700  }
   701  
   702  // pruneUnblockIndexes is used to prune any tracked entry that is excessively
   703  // old. This protects againsts unbounded growth of the map.
   704  func (b *BlockedEvals) pruneUnblockIndexes() {
   705  	b.l.Lock()
   706  	defer b.l.Unlock()
   707  
   708  	if b.timetable == nil {
   709  		return
   710  	}
   711  
   712  	cutoff := time.Now().UTC().Add(-1 * pruneThreshold)
   713  	oldThreshold := b.timetable.NearestIndex(cutoff)
   714  
   715  	for key, index := range b.unblockIndexes {
   716  		if index < oldThreshold {
   717  			delete(b.unblockIndexes, key)
   718  		}
   719  	}
   720  }