github.com/manicqin/nomad@v0.9.5/nomad/eval_broker.go

github.com/manicqin/nomad@v0.9.5/nomad/eval_broker.go (about)

     1  package nomad
     2  
     3  import (
     4  	"container/heap"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"sync"
     9  	"time"
    10  
    11  	"context"
    12  
    13  	metrics "github.com/armon/go-metrics"
    14  	"github.com/hashicorp/nomad/helper/uuid"
    15  	"github.com/hashicorp/nomad/lib/delayheap"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  )
    18  
    19  const (
    20  	// failedQueue is the queue we add Evaluations to once
    21  	// they've reached the deliveryLimit. This allows the leader to
    22  	// set the status to failed.
    23  	failedQueue = "_failed"
    24  )
    25  
    26  var (
    27  	// ErrNotOutstanding is returned if an evaluation is not outstanding
    28  	ErrNotOutstanding = errors.New("evaluation is not outstanding")
    29  
    30  	// ErrTokenMismatch is the outstanding eval has a different token
    31  	ErrTokenMismatch = errors.New("evaluation token does not match")
    32  
    33  	// ErrNackTimeoutReached is returned if an expired evaluation is reset
    34  	ErrNackTimeoutReached = errors.New("evaluation nack timeout reached")
    35  )
    36  
    37  // EvalBroker is used to manage brokering of evaluations. When an evaluation is
    38  // created, due to a change in a job specification or a node, we put it into the
    39  // broker. The broker sorts by evaluations by priority and scheduler type. This
    40  // allows us to dequeue the highest priority work first, while also allowing sub-schedulers
    41  // to only dequeue work they know how to handle. The broker is designed to be entirely
    42  // in-memory and is managed by the leader node.
    43  //
    44  // The broker must provide at-least-once delivery semantics. It relies on explicit
    45  // Ack/Nack messages to handle this. If a delivery is not Ack'd in a sufficient time
    46  // span, it will be assumed Nack'd.
    47  type EvalBroker struct {
    48  	nackTimeout   time.Duration
    49  	deliveryLimit int
    50  
    51  	enabled bool
    52  	stats   *BrokerStats
    53  
    54  	// evals tracks queued evaluations by ID to de-duplicate enqueue.
    55  	// The counter is the number of times we've attempted delivery,
    56  	// and is used to eventually fail an evaluation.
    57  	evals map[string]int
    58  
    59  	// jobEvals tracks queued evaluations by a job's ID and namespace to serialize them
    60  	jobEvals map[structs.NamespacedID]string
    61  
    62  	// blocked tracks the blocked evaluations by JobID in a priority queue
    63  	blocked map[structs.NamespacedID]PendingEvaluations
    64  
    65  	// ready tracks the ready jobs by scheduler in a priority queue
    66  	ready map[string]PendingEvaluations
    67  
    68  	// unack is a map of evalID to an un-acknowledged evaluation
    69  	unack map[string]*unackEval
    70  
    71  	// waiting is used to notify on a per-scheduler basis of ready work
    72  	waiting map[string]chan struct{}
    73  
    74  	// requeue tracks evaluations that need to be re-enqueued once the current
    75  	// evaluation finishes by token. If the token is Nacked or rejected the
    76  	// evaluation is dropped but if Acked successfully, the evaluation is
    77  	// queued.
    78  	requeue map[string]*structs.Evaluation
    79  
    80  	// timeWait has evaluations that are waiting for time to elapse
    81  	timeWait map[string]*time.Timer
    82  
    83  	// delayedEvalCancelFunc is used to stop the long running go routine
    84  	// that processes delayed evaluations
    85  	delayedEvalCancelFunc context.CancelFunc
    86  
    87  	// delayHeap is a heap used to track incoming evaluations that are
    88  	// not eligible to enqueue until their WaitTime
    89  	delayHeap *delayheap.DelayHeap
    90  
    91  	// delayedEvalsUpdateCh is used to trigger notifications for updates
    92  	// to the delayHeap
    93  	delayedEvalsUpdateCh chan struct{}
    94  
    95  	// initialNackDelay is the delay applied before re-enqueuing a
    96  	// Nacked evaluation for the first time.
    97  	initialNackDelay time.Duration
    98  
    99  	// subsequentNackDelay is the delay applied before reenqueuing
   100  	// an evaluation that has been Nacked more than once. This delay is
   101  	// compounding after the first Nack.
   102  	subsequentNackDelay time.Duration
   103  
   104  	l sync.RWMutex
   105  }
   106  
   107  // unackEval tracks an unacknowledged evaluation along with the Nack timer
   108  type unackEval struct {
   109  	Eval      *structs.Evaluation
   110  	Token     string
   111  	NackTimer *time.Timer
   112  }
   113  
   114  // PendingEvaluations is a list of waiting evaluations.
   115  // We implement the container/heap interface so that this is a
   116  // priority queue
   117  type PendingEvaluations []*structs.Evaluation
   118  
   119  // NewEvalBroker creates a new evaluation broker. This is parameterized
   120  // with the timeout used for messages that are not acknowledged before we
   121  // assume a Nack and attempt to redeliver as well as the deliveryLimit
   122  // which prevents a failing eval from being endlessly delivered. The
   123  // initialNackDelay is the delay before making a Nacked evaluation available
   124  // again for the first Nack and subsequentNackDelay is the compounding delay
   125  // after the first Nack.
   126  func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, deliveryLimit int) (*EvalBroker, error) {
   127  	if timeout < 0 {
   128  		return nil, fmt.Errorf("timeout cannot be negative")
   129  	}
   130  	b := &EvalBroker{
   131  		nackTimeout:          timeout,
   132  		deliveryLimit:        deliveryLimit,
   133  		enabled:              false,
   134  		stats:                new(BrokerStats),
   135  		evals:                make(map[string]int),
   136  		jobEvals:             make(map[structs.NamespacedID]string),
   137  		blocked:              make(map[structs.NamespacedID]PendingEvaluations),
   138  		ready:                make(map[string]PendingEvaluations),
   139  		unack:                make(map[string]*unackEval),
   140  		waiting:              make(map[string]chan struct{}),
   141  		requeue:              make(map[string]*structs.Evaluation),
   142  		timeWait:             make(map[string]*time.Timer),
   143  		initialNackDelay:     initialNackDelay,
   144  		subsequentNackDelay:  subsequentNackDelay,
   145  		delayHeap:            delayheap.NewDelayHeap(),
   146  		delayedEvalsUpdateCh: make(chan struct{}, 1),
   147  	}
   148  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   149  
   150  	return b, nil
   151  }
   152  
   153  // Enabled is used to check if the broker is enabled.
   154  func (b *EvalBroker) Enabled() bool {
   155  	b.l.RLock()
   156  	defer b.l.RUnlock()
   157  	return b.enabled
   158  }
   159  
   160  // SetEnabled is used to control if the broker is enabled. The broker
   161  // should only be enabled on the active leader.
   162  func (b *EvalBroker) SetEnabled(enabled bool) {
   163  	b.l.Lock()
   164  	defer b.l.Unlock()
   165  
   166  	prevEnabled := b.enabled
   167  	b.enabled = enabled
   168  	if !prevEnabled && enabled {
   169  		// start the go routine for delayed evals
   170  		ctx, cancel := context.WithCancel(context.Background())
   171  		b.delayedEvalCancelFunc = cancel
   172  		go b.runDelayedEvalsWatcher(ctx, b.delayedEvalsUpdateCh)
   173  	}
   174  
   175  	if !enabled {
   176  		b.flush()
   177  	}
   178  }
   179  
   180  // Enqueue is used to enqueue a new evaluation
   181  func (b *EvalBroker) Enqueue(eval *structs.Evaluation) {
   182  	b.l.Lock()
   183  	defer b.l.Unlock()
   184  	b.processEnqueue(eval, "")
   185  }
   186  
   187  // EnqueueAll is used to enqueue many evaluations. The map allows evaluations
   188  // that are being re-enqueued to include their token.
   189  //
   190  // When requeuing an evaluation that potentially may be already
   191  // enqueued. The evaluation is handled in one of the following ways:
   192  // * Evaluation not outstanding: Process as a normal Enqueue
   193  // * Evaluation outstanding: Do not allow the evaluation to be dequeued til:
   194  //    * Ack received:  Unblock the evaluation allowing it to be dequeued
   195  //    * Nack received: Drop the evaluation as it was created as a result of a
   196  //    scheduler run that was Nack'd
   197  func (b *EvalBroker) EnqueueAll(evals map[*structs.Evaluation]string) {
   198  	// The lock needs to be held until all evaluations are enqueued. This is so
   199  	// that when Dequeue operations are unblocked they will pick the highest
   200  	// priority evaluations.
   201  	b.l.Lock()
   202  	defer b.l.Unlock()
   203  	for eval, token := range evals {
   204  		b.processEnqueue(eval, token)
   205  	}
   206  }
   207  
   208  // processEnqueue deduplicates evals and either enqueue immediately or enforce
   209  // the evals wait time. If the token is passed, and the evaluation ID is
   210  // outstanding, the evaluation is blocked until an Ack/Nack is received.
   211  // processEnqueue must be called with the lock held.
   212  func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) {
   213  	// If we're not enabled, don't enable more queuing.
   214  	if !b.enabled {
   215  		return
   216  	}
   217  
   218  	// Check if already enqueued
   219  	if _, ok := b.evals[eval.ID]; ok {
   220  		if token == "" {
   221  			return
   222  		}
   223  
   224  		// If the token has been passed, the evaluation is being reblocked by
   225  		// the scheduler and should be processed once the outstanding evaluation
   226  		// is Acked or Nacked.
   227  		if unack, ok := b.unack[eval.ID]; ok && unack.Token == token {
   228  			b.requeue[token] = eval
   229  		}
   230  		return
   231  	} else if b.enabled {
   232  		b.evals[eval.ID] = 0
   233  	}
   234  
   235  	// Check if we need to enforce a wait
   236  	if eval.Wait > 0 {
   237  		b.processWaitingEnqueue(eval)
   238  		return
   239  	}
   240  
   241  	if !eval.WaitUntil.IsZero() {
   242  		b.delayHeap.Push(&evalWrapper{eval}, eval.WaitUntil)
   243  		b.stats.TotalWaiting += 1
   244  		// Signal an update.
   245  		select {
   246  		case b.delayedEvalsUpdateCh <- struct{}{}:
   247  		default:
   248  		}
   249  		return
   250  	}
   251  
   252  	b.enqueueLocked(eval, eval.Type)
   253  }
   254  
   255  // processWaitingEnqueue waits the given duration on the evaluation before
   256  // enqueuing.
   257  func (b *EvalBroker) processWaitingEnqueue(eval *structs.Evaluation) {
   258  	timer := time.AfterFunc(eval.Wait, func() {
   259  		b.enqueueWaiting(eval)
   260  	})
   261  	b.timeWait[eval.ID] = timer
   262  	b.stats.TotalWaiting += 1
   263  }
   264  
   265  // enqueueWaiting is used to enqueue a waiting evaluation
   266  func (b *EvalBroker) enqueueWaiting(eval *structs.Evaluation) {
   267  	b.l.Lock()
   268  	defer b.l.Unlock()
   269  
   270  	delete(b.timeWait, eval.ID)
   271  	b.stats.TotalWaiting -= 1
   272  
   273  	b.enqueueLocked(eval, eval.Type)
   274  }
   275  
   276  // enqueueLocked is used to enqueue with the lock held
   277  func (b *EvalBroker) enqueueLocked(eval *structs.Evaluation, queue string) {
   278  	// Do nothing if not enabled
   279  	if !b.enabled {
   280  		return
   281  	}
   282  
   283  	// Check if there is an evaluation for this JobID pending
   284  	namespacedID := structs.NamespacedID{
   285  		ID:        eval.JobID,
   286  		Namespace: eval.Namespace,
   287  	}
   288  	pendingEval := b.jobEvals[namespacedID]
   289  	if pendingEval == "" {
   290  		b.jobEvals[namespacedID] = eval.ID
   291  	} else if pendingEval != eval.ID {
   292  		blocked := b.blocked[namespacedID]
   293  		heap.Push(&blocked, eval)
   294  		b.blocked[namespacedID] = blocked
   295  		b.stats.TotalBlocked += 1
   296  		return
   297  	}
   298  
   299  	// Find the pending by scheduler class
   300  	pending, ok := b.ready[queue]
   301  	if !ok {
   302  		pending = make([]*structs.Evaluation, 0, 16)
   303  		if _, ok := b.waiting[queue]; !ok {
   304  			b.waiting[queue] = make(chan struct{}, 1)
   305  		}
   306  	}
   307  
   308  	// Push onto the heap
   309  	heap.Push(&pending, eval)
   310  	b.ready[queue] = pending
   311  
   312  	// Update the stats
   313  	b.stats.TotalReady += 1
   314  	bySched, ok := b.stats.ByScheduler[queue]
   315  	if !ok {
   316  		bySched = &SchedulerStats{}
   317  		b.stats.ByScheduler[queue] = bySched
   318  	}
   319  	bySched.Ready += 1
   320  
   321  	// Unblock any blocked dequeues
   322  	select {
   323  	case b.waiting[queue] <- struct{}{}:
   324  	default:
   325  	}
   326  }
   327  
   328  // Dequeue is used to perform a blocking dequeue
   329  func (b *EvalBroker) Dequeue(schedulers []string, timeout time.Duration) (*structs.Evaluation, string, error) {
   330  	var timeoutTimer *time.Timer
   331  	var timeoutCh <-chan time.Time
   332  SCAN:
   333  	// Scan for work
   334  	eval, token, err := b.scanForSchedulers(schedulers)
   335  	if err != nil {
   336  		if timeoutTimer != nil {
   337  			timeoutTimer.Stop()
   338  		}
   339  		return nil, "", err
   340  	}
   341  
   342  	// Check if we have something
   343  	if eval != nil {
   344  		if timeoutTimer != nil {
   345  			timeoutTimer.Stop()
   346  		}
   347  		return eval, token, nil
   348  	}
   349  
   350  	// Setup the timeout channel the first time around
   351  	if timeoutTimer == nil && timeout != 0 {
   352  		timeoutTimer = time.NewTimer(timeout)
   353  		timeoutCh = timeoutTimer.C
   354  	}
   355  
   356  	// Block until we get work
   357  	scan := b.waitForSchedulers(schedulers, timeoutCh)
   358  	if scan {
   359  		goto SCAN
   360  	}
   361  	return nil, "", nil
   362  }
   363  
   364  // scanForSchedulers scans for work on any of the schedulers. The highest priority work
   365  // is dequeued first. This may return nothing if there is no work waiting.
   366  func (b *EvalBroker) scanForSchedulers(schedulers []string) (*structs.Evaluation, string, error) {
   367  	b.l.Lock()
   368  	defer b.l.Unlock()
   369  
   370  	// Do nothing if not enabled
   371  	if !b.enabled {
   372  		return nil, "", fmt.Errorf("eval broker disabled")
   373  	}
   374  
   375  	// Scan for eligible work
   376  	var eligibleSched []string
   377  	var eligiblePriority int
   378  	for _, sched := range schedulers {
   379  		// Get the pending queue
   380  		pending, ok := b.ready[sched]
   381  		if !ok {
   382  			continue
   383  		}
   384  
   385  		// Peek at the next item
   386  		ready := pending.Peek()
   387  		if ready == nil {
   388  			continue
   389  		}
   390  
   391  		// Add to eligible if equal or greater priority
   392  		if len(eligibleSched) == 0 || ready.Priority > eligiblePriority {
   393  			eligibleSched = []string{sched}
   394  			eligiblePriority = ready.Priority
   395  
   396  		} else if eligiblePriority > ready.Priority {
   397  			continue
   398  
   399  		} else if eligiblePriority == ready.Priority {
   400  			eligibleSched = append(eligibleSched, sched)
   401  		}
   402  	}
   403  
   404  	// Determine behavior based on eligible work
   405  	switch n := len(eligibleSched); n {
   406  	case 0:
   407  		// No work to do!
   408  		return nil, "", nil
   409  
   410  	case 1:
   411  		// Only a single task, dequeue
   412  		return b.dequeueForSched(eligibleSched[0])
   413  
   414  	default:
   415  		// Multiple tasks. We pick a random task so that we fairly
   416  		// distribute work.
   417  		offset := rand.Intn(n)
   418  		return b.dequeueForSched(eligibleSched[offset])
   419  	}
   420  }
   421  
   422  // dequeueForSched is used to dequeue the next work item for a given scheduler.
   423  // This assumes locks are held and that this scheduler has work
   424  func (b *EvalBroker) dequeueForSched(sched string) (*structs.Evaluation, string, error) {
   425  	// Get the pending queue
   426  	pending := b.ready[sched]
   427  	raw := heap.Pop(&pending)
   428  	b.ready[sched] = pending
   429  	eval := raw.(*structs.Evaluation)
   430  
   431  	// Generate a UUID for the token
   432  	token := uuid.Generate()
   433  
   434  	// Setup Nack timer
   435  	nackTimer := time.AfterFunc(b.nackTimeout, func() {
   436  		b.Nack(eval.ID, token)
   437  	})
   438  
   439  	// Add to the unack queue
   440  	b.unack[eval.ID] = &unackEval{
   441  		Eval:      eval,
   442  		Token:     token,
   443  		NackTimer: nackTimer,
   444  	}
   445  
   446  	// Increment the dequeue count
   447  	b.evals[eval.ID] += 1
   448  
   449  	// Update the stats
   450  	b.stats.TotalReady -= 1
   451  	b.stats.TotalUnacked += 1
   452  	bySched := b.stats.ByScheduler[sched]
   453  	bySched.Ready -= 1
   454  	bySched.Unacked += 1
   455  
   456  	return eval, token, nil
   457  }
   458  
   459  // waitForSchedulers is used to wait for work on any of the scheduler or until a timeout.
   460  // Returns if there is work waiting potentially.
   461  func (b *EvalBroker) waitForSchedulers(schedulers []string, timeoutCh <-chan time.Time) bool {
   462  	doneCh := make(chan struct{})
   463  	readyCh := make(chan struct{}, 1)
   464  	defer close(doneCh)
   465  
   466  	// Start all the watchers
   467  	b.l.Lock()
   468  	for _, sched := range schedulers {
   469  		waitCh, ok := b.waiting[sched]
   470  		if !ok {
   471  			waitCh = make(chan struct{}, 1)
   472  			b.waiting[sched] = waitCh
   473  		}
   474  
   475  		// Start a goroutine that either waits for the waitCh on this scheduler
   476  		// to unblock or for this waitForSchedulers call to return
   477  		go func() {
   478  			select {
   479  			case <-waitCh:
   480  				select {
   481  				case readyCh <- struct{}{}:
   482  				default:
   483  				}
   484  			case <-doneCh:
   485  			}
   486  		}()
   487  	}
   488  	b.l.Unlock()
   489  
   490  	// Block until we have ready work and should scan, or until we timeout
   491  	// and should not make an attempt to scan for work
   492  	select {
   493  	case <-readyCh:
   494  		return true
   495  	case <-timeoutCh:
   496  		return false
   497  	}
   498  }
   499  
   500  // Outstanding checks if an EvalID has been delivered but not acknowledged
   501  // and returns the associated token for the evaluation.
   502  func (b *EvalBroker) Outstanding(evalID string) (string, bool) {
   503  	b.l.RLock()
   504  	defer b.l.RUnlock()
   505  	unack, ok := b.unack[evalID]
   506  	if !ok {
   507  		return "", false
   508  	}
   509  	return unack.Token, true
   510  }
   511  
   512  // OutstandingReset resets the Nack timer for the EvalID if the
   513  // token matches and the eval is outstanding
   514  func (b *EvalBroker) OutstandingReset(evalID, token string) error {
   515  	b.l.RLock()
   516  	defer b.l.RUnlock()
   517  	unack, ok := b.unack[evalID]
   518  	if !ok {
   519  		return ErrNotOutstanding
   520  	}
   521  	if unack.Token != token {
   522  		return ErrTokenMismatch
   523  	}
   524  	if !unack.NackTimer.Reset(b.nackTimeout) {
   525  		return ErrNackTimeoutReached
   526  	}
   527  	return nil
   528  }
   529  
   530  // Ack is used to positively acknowledge handling an evaluation
   531  func (b *EvalBroker) Ack(evalID, token string) error {
   532  	b.l.Lock()
   533  	defer b.l.Unlock()
   534  
   535  	// Always delete the requeued evaluation. Either the Ack is successful and
   536  	// we requeue it or it isn't and we want to remove it.
   537  	defer delete(b.requeue, token)
   538  
   539  	// Lookup the unack'd eval
   540  	unack, ok := b.unack[evalID]
   541  	if !ok {
   542  		return fmt.Errorf("Evaluation ID not found")
   543  	}
   544  	if unack.Token != token {
   545  		return fmt.Errorf("Token does not match for Evaluation ID")
   546  	}
   547  	jobID := unack.Eval.JobID
   548  
   549  	// Ensure we were able to stop the timer
   550  	if !unack.NackTimer.Stop() {
   551  		return fmt.Errorf("Evaluation ID Ack'd after Nack timer expiration")
   552  	}
   553  
   554  	// Update the stats
   555  	b.stats.TotalUnacked -= 1
   556  	queue := unack.Eval.Type
   557  	if b.evals[evalID] > b.deliveryLimit {
   558  		queue = failedQueue
   559  	}
   560  	bySched := b.stats.ByScheduler[queue]
   561  	bySched.Unacked -= 1
   562  
   563  	// Cleanup
   564  	delete(b.unack, evalID)
   565  	delete(b.evals, evalID)
   566  
   567  	namespacedID := structs.NamespacedID{
   568  		ID:        jobID,
   569  		Namespace: unack.Eval.Namespace,
   570  	}
   571  	delete(b.jobEvals, namespacedID)
   572  
   573  	// Check if there are any blocked evaluations
   574  	if blocked := b.blocked[namespacedID]; len(blocked) != 0 {
   575  		raw := heap.Pop(&blocked)
   576  		if len(blocked) > 0 {
   577  			b.blocked[namespacedID] = blocked
   578  		} else {
   579  			delete(b.blocked, namespacedID)
   580  		}
   581  		eval := raw.(*structs.Evaluation)
   582  		b.stats.TotalBlocked -= 1
   583  		b.enqueueLocked(eval, eval.Type)
   584  	}
   585  
   586  	// Re-enqueue the evaluation.
   587  	if eval, ok := b.requeue[token]; ok {
   588  		b.processEnqueue(eval, "")
   589  	}
   590  
   591  	return nil
   592  }
   593  
   594  // Nack is used to negatively acknowledge handling an evaluation
   595  func (b *EvalBroker) Nack(evalID, token string) error {
   596  	b.l.Lock()
   597  	defer b.l.Unlock()
   598  
   599  	// Always delete the requeued evaluation since the Nack means the requeue is
   600  	// invalid.
   601  	delete(b.requeue, token)
   602  
   603  	// Lookup the unack'd eval
   604  	unack, ok := b.unack[evalID]
   605  	if !ok {
   606  		return fmt.Errorf("Evaluation ID not found")
   607  	}
   608  	if unack.Token != token {
   609  		return fmt.Errorf("Token does not match for Evaluation ID")
   610  	}
   611  
   612  	// Stop the timer, doesn't matter if we've missed it
   613  	unack.NackTimer.Stop()
   614  
   615  	// Cleanup
   616  	delete(b.unack, evalID)
   617  
   618  	// Update the stats
   619  	b.stats.TotalUnacked -= 1
   620  	bySched := b.stats.ByScheduler[unack.Eval.Type]
   621  	bySched.Unacked -= 1
   622  
   623  	// Check if we've hit the delivery limit, and re-enqueue
   624  	// in the failedQueue
   625  	if dequeues := b.evals[evalID]; dequeues >= b.deliveryLimit {
   626  		b.enqueueLocked(unack.Eval, failedQueue)
   627  	} else {
   628  		e := unack.Eval
   629  		e.Wait = b.nackReenqueueDelay(e, dequeues)
   630  
   631  		// See if there should be a delay before re-enqueuing
   632  		if e.Wait > 0 {
   633  			b.processWaitingEnqueue(e)
   634  		} else {
   635  			b.enqueueLocked(e, e.Type)
   636  		}
   637  	}
   638  
   639  	return nil
   640  }
   641  
   642  // nackReenqueueDelay is used to determine the delay that should be applied on
   643  // the evaluation given the number of previous attempts
   644  func (b *EvalBroker) nackReenqueueDelay(eval *structs.Evaluation, prevDequeues int) time.Duration {
   645  	switch {
   646  	case prevDequeues <= 0:
   647  		return 0
   648  	case prevDequeues == 1:
   649  		return b.initialNackDelay
   650  	default:
   651  		// For each subsequent nack compound a delay
   652  		return time.Duration(prevDequeues-1) * b.subsequentNackDelay
   653  	}
   654  }
   655  
   656  // PauseNackTimeout is used to pause the Nack timeout for an eval that is making
   657  // progress but is in a potentially unbounded operation such as the plan queue.
   658  func (b *EvalBroker) PauseNackTimeout(evalID, token string) error {
   659  	b.l.RLock()
   660  	defer b.l.RUnlock()
   661  	unack, ok := b.unack[evalID]
   662  	if !ok {
   663  		return ErrNotOutstanding
   664  	}
   665  	if unack.Token != token {
   666  		return ErrTokenMismatch
   667  	}
   668  	if !unack.NackTimer.Stop() {
   669  		return ErrNackTimeoutReached
   670  	}
   671  	return nil
   672  }
   673  
   674  // ResumeNackTimeout is used to resume the Nack timeout for an eval that was
   675  // paused. It should be resumed after leaving an unbounded operation.
   676  func (b *EvalBroker) ResumeNackTimeout(evalID, token string) error {
   677  	b.l.Lock()
   678  	defer b.l.Unlock()
   679  	unack, ok := b.unack[evalID]
   680  	if !ok {
   681  		return ErrNotOutstanding
   682  	}
   683  	if unack.Token != token {
   684  		return ErrTokenMismatch
   685  	}
   686  	unack.NackTimer.Reset(b.nackTimeout)
   687  	return nil
   688  }
   689  
   690  // Flush is used to clear the state of the broker. It must be called from within
   691  // the lock.
   692  func (b *EvalBroker) flush() {
   693  	// Unblock any waiters
   694  	for _, waitCh := range b.waiting {
   695  		close(waitCh)
   696  	}
   697  	b.waiting = make(map[string]chan struct{})
   698  
   699  	// Cancel any Nack timers
   700  	for _, unack := range b.unack {
   701  		unack.NackTimer.Stop()
   702  	}
   703  
   704  	// Cancel any time wait evals
   705  	for _, wait := range b.timeWait {
   706  		wait.Stop()
   707  	}
   708  
   709  	// Cancel the delayed evaluations goroutine
   710  	if b.delayedEvalCancelFunc != nil {
   711  		b.delayedEvalCancelFunc()
   712  	}
   713  
   714  	// Clear out the update channel for delayed evaluations
   715  	b.delayedEvalsUpdateCh = make(chan struct{}, 1)
   716  
   717  	// Reset the broker
   718  	b.stats.TotalReady = 0
   719  	b.stats.TotalUnacked = 0
   720  	b.stats.TotalBlocked = 0
   721  	b.stats.TotalWaiting = 0
   722  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   723  	b.evals = make(map[string]int)
   724  	b.jobEvals = make(map[structs.NamespacedID]string)
   725  	b.blocked = make(map[structs.NamespacedID]PendingEvaluations)
   726  	b.ready = make(map[string]PendingEvaluations)
   727  	b.unack = make(map[string]*unackEval)
   728  	b.timeWait = make(map[string]*time.Timer)
   729  	b.delayHeap = delayheap.NewDelayHeap()
   730  }
   731  
   732  // evalWrapper satisfies the HeapNode interface
   733  type evalWrapper struct {
   734  	eval *structs.Evaluation
   735  }
   736  
   737  func (d *evalWrapper) Data() interface{} {
   738  	return d.eval
   739  }
   740  
   741  func (d *evalWrapper) ID() string {
   742  	return d.eval.ID
   743  }
   744  
   745  func (d *evalWrapper) Namespace() string {
   746  	return d.eval.Namespace
   747  }
   748  
   749  // runDelayedEvalsWatcher is a long-lived function that waits till a time deadline is met for
   750  // pending evaluations before enqueuing them
   751  func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context, updateCh <-chan struct{}) {
   752  	var timerChannel <-chan time.Time
   753  	var delayTimer *time.Timer
   754  	for {
   755  		eval, waitUntil := b.nextDelayedEval()
   756  		if waitUntil.IsZero() {
   757  			timerChannel = nil
   758  		} else {
   759  			launchDur := waitUntil.Sub(time.Now().UTC())
   760  			if delayTimer == nil {
   761  				delayTimer = time.NewTimer(launchDur)
   762  			} else {
   763  				delayTimer.Reset(launchDur)
   764  			}
   765  			timerChannel = delayTimer.C
   766  		}
   767  
   768  		select {
   769  		case <-ctx.Done():
   770  			return
   771  		case <-timerChannel:
   772  			// remove from the heap since we can enqueue it now
   773  			b.l.Lock()
   774  			b.delayHeap.Remove(&evalWrapper{eval})
   775  			b.stats.TotalWaiting -= 1
   776  			b.enqueueLocked(eval, eval.Type)
   777  			b.l.Unlock()
   778  		case <-updateCh:
   779  			continue
   780  		}
   781  	}
   782  }
   783  
   784  // nextDelayedEval returns the next delayed eval to launch and when it should be enqueued.
   785  // This peeks at the heap to return the top. If the heap is empty, this returns nil and zero time.
   786  func (b *EvalBroker) nextDelayedEval() (*structs.Evaluation, time.Time) {
   787  	b.l.RLock()
   788  	defer b.l.RUnlock()
   789  
   790  	// If there is nothing wait for an update.
   791  	if b.delayHeap.Length() == 0 {
   792  		return nil, time.Time{}
   793  	}
   794  	nextEval := b.delayHeap.Peek()
   795  	if nextEval == nil {
   796  		return nil, time.Time{}
   797  	}
   798  	eval := nextEval.Node.Data().(*structs.Evaluation)
   799  	return eval, nextEval.WaitUntil
   800  }
   801  
   802  // Stats is used to query the state of the broker
   803  func (b *EvalBroker) Stats() *BrokerStats {
   804  	// Allocate a new stats struct
   805  	stats := new(BrokerStats)
   806  	stats.ByScheduler = make(map[string]*SchedulerStats)
   807  
   808  	b.l.RLock()
   809  	defer b.l.RUnlock()
   810  
   811  	// Copy all the stats
   812  	stats.TotalReady = b.stats.TotalReady
   813  	stats.TotalUnacked = b.stats.TotalUnacked
   814  	stats.TotalBlocked = b.stats.TotalBlocked
   815  	stats.TotalWaiting = b.stats.TotalWaiting
   816  	for sched, subStat := range b.stats.ByScheduler {
   817  		subStatCopy := new(SchedulerStats)
   818  		*subStatCopy = *subStat
   819  		stats.ByScheduler[sched] = subStatCopy
   820  	}
   821  	return stats
   822  }
   823  
   824  // EmitStats is used to export metrics about the broker while enabled
   825  func (b *EvalBroker) EmitStats(period time.Duration, stopCh <-chan struct{}) {
   826  	for {
   827  		select {
   828  		case <-time.After(period):
   829  			stats := b.Stats()
   830  			metrics.SetGauge([]string{"nomad", "broker", "total_ready"}, float32(stats.TotalReady))
   831  			metrics.SetGauge([]string{"nomad", "broker", "total_unacked"}, float32(stats.TotalUnacked))
   832  			metrics.SetGauge([]string{"nomad", "broker", "total_blocked"}, float32(stats.TotalBlocked))
   833  			metrics.SetGauge([]string{"nomad", "broker", "total_waiting"}, float32(stats.TotalWaiting))
   834  			for sched, schedStats := range stats.ByScheduler {
   835  				metrics.SetGauge([]string{"nomad", "broker", sched, "ready"}, float32(schedStats.Ready))
   836  				metrics.SetGauge([]string{"nomad", "broker", sched, "unacked"}, float32(schedStats.Unacked))
   837  			}
   838  
   839  		case <-stopCh:
   840  			return
   841  		}
   842  	}
   843  }
   844  
   845  // BrokerStats returns all the stats about the broker
   846  type BrokerStats struct {
   847  	TotalReady   int
   848  	TotalUnacked int
   849  	TotalBlocked int
   850  	TotalWaiting int
   851  	ByScheduler  map[string]*SchedulerStats
   852  }
   853  
   854  // SchedulerStats returns the stats per scheduler
   855  type SchedulerStats struct {
   856  	Ready   int
   857  	Unacked int
   858  }
   859  
   860  // Len is for the sorting interface
   861  func (p PendingEvaluations) Len() int {
   862  	return len(p)
   863  }
   864  
   865  // Less is for the sorting interface. We flip the check
   866  // so that the "min" in the min-heap is the element with the
   867  // highest priority
   868  func (p PendingEvaluations) Less(i, j int) bool {
   869  	if p[i].JobID != p[j].JobID && p[i].Priority != p[j].Priority {
   870  		return !(p[i].Priority < p[j].Priority)
   871  	}
   872  	return p[i].CreateIndex < p[j].CreateIndex
   873  }
   874  
   875  // Swap is for the sorting interface
   876  func (p PendingEvaluations) Swap(i, j int) {
   877  	p[i], p[j] = p[j], p[i]
   878  }
   879  
   880  // Push is used to add a new evaluation to the slice
   881  func (p *PendingEvaluations) Push(e interface{}) {
   882  	*p = append(*p, e.(*structs.Evaluation))
   883  }
   884  
   885  // Pop is used to remove an evaluation from the slice
   886  func (p *PendingEvaluations) Pop() interface{} {
   887  	n := len(*p)
   888  	e := (*p)[n-1]
   889  	(*p)[n-1] = nil
   890  	*p = (*p)[:n-1]
   891  	return e
   892  }
   893  
   894  // Peek is used to peek at the next element that would be popped
   895  func (p PendingEvaluations) Peek() *structs.Evaluation {
   896  	n := len(p)
   897  	if n == 0 {
   898  		return nil
   899  	}
   900  	return p[n-1]
   901  }