github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/eval_broker.go (about)

     1  package nomad
     2  
     3  import (
     4  	"container/heap"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/armon/go-metrics"
    12  	"github.com/hashicorp/nomad/helper/uuid"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  const (
    17  	// failedQueue is the queue we add Evaluations to once
    18  	// they've reached the deliveryLimit. This allows the leader to
    19  	// set the status to failed.
    20  	failedQueue = "_failed"
    21  )
    22  
    23  var (
    24  	// ErrNotOutstanding is returned if an evaluation is not outstanding
    25  	ErrNotOutstanding = errors.New("evaluation is not outstanding")
    26  
    27  	// ErrTokenMismatch is the outstanding eval has a different token
    28  	ErrTokenMismatch = errors.New("evaluation token does not match")
    29  
    30  	// ErrNackTimeoutReached is returned if an expired evaluation is reset
    31  	ErrNackTimeoutReached = errors.New("evaluation nack timeout reached")
    32  )
    33  
    34  // EvalBroker is used to manage brokering of evaluations. When an evaluation is
    35  // created, due to a change in a job specification or a node, we put it into the
    36  // broker. The broker sorts by evaluations by priority and scheduler type. This
    37  // allows us to dequeue the highest priority work first, while also allowing sub-schedulers
    38  // to only dequeue work they know how to handle. The broker is designed to be entirely
    39  // in-memory and is managed by the leader node.
    40  //
    41  // The broker must provide at-least-once delivery semantics. It relies on explicit
    42  // Ack/Nack messages to handle this. If a delivery is not Ack'd in a sufficient time
    43  // span, it will be assumed Nack'd.
    44  type EvalBroker struct {
    45  	nackTimeout   time.Duration
    46  	deliveryLimit int
    47  
    48  	enabled bool
    49  	stats   *BrokerStats
    50  
    51  	// evals tracks queued evaluations by ID to de-duplicate enqueue.
    52  	// The counter is the number of times we've attempted delivery,
    53  	// and is used to eventually fail an evaluation.
    54  	evals map[string]int
    55  
    56  	// jobEvals tracks queued evaluations by a job's ID and namespace to serialize them
    57  	jobEvals map[structs.NamespacedID]string
    58  
    59  	// blocked tracks the blocked evaluations by JobID in a priority queue
    60  	blocked map[string]PendingEvaluations
    61  
    62  	// ready tracks the ready jobs by scheduler in a priority queue
    63  	ready map[string]PendingEvaluations
    64  
    65  	// unack is a map of evalID to an un-acknowledged evaluation
    66  	unack map[string]*unackEval
    67  
    68  	// waiting is used to notify on a per-scheduler basis of ready work
    69  	waiting map[string]chan struct{}
    70  
    71  	// requeue tracks evaluations that need to be re-enqueued once the current
    72  	// evaluation finishes by token. If the token is Nacked or rejected the
    73  	// evaluation is dropped but if Acked successfully, the evaluation is
    74  	// queued.
    75  	requeue map[string]*structs.Evaluation
    76  
    77  	// timeWait has evaluations that are waiting for time to elapse
    78  	timeWait map[string]*time.Timer
    79  
    80  	// initialNackDelay is the delay applied before reenqueuing a
    81  	// Nacked evaluation for the first time.
    82  	initialNackDelay time.Duration
    83  
    84  	// subsequentNackDelay is the delay applied before reenqueuing
    85  	// an evaluation that has been Nacked more than once. This delay is
    86  	// compounding after the first Nack.
    87  	subsequentNackDelay time.Duration
    88  
    89  	l sync.RWMutex
    90  }
    91  
    92  // unackEval tracks an unacknowledged evaluation along with the Nack timer
    93  type unackEval struct {
    94  	Eval      *structs.Evaluation
    95  	Token     string
    96  	NackTimer *time.Timer
    97  }
    98  
    99  // PendingEvaluations is a list of waiting evaluations.
   100  // We implement the container/heap interface so that this is a
   101  // priority queue
   102  type PendingEvaluations []*structs.Evaluation
   103  
   104  // NewEvalBroker creates a new evaluation broker. This is parameterized
   105  // with the timeout used for messages that are not acknowledged before we
   106  // assume a Nack and attempt to redeliver as well as the deliveryLimit
   107  // which prevents a failing eval from being endlessly delivered. The
   108  // initialNackDelay is the delay before making a Nacked evalution available
   109  // again for the first Nack and subsequentNackDelay is the compounding delay
   110  // after the first Nack.
   111  func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, deliveryLimit int) (*EvalBroker, error) {
   112  	if timeout < 0 {
   113  		return nil, fmt.Errorf("timeout cannot be negative")
   114  	}
   115  	b := &EvalBroker{
   116  		nackTimeout:         timeout,
   117  		deliveryLimit:       deliveryLimit,
   118  		enabled:             false,
   119  		stats:               new(BrokerStats),
   120  		evals:               make(map[string]int),
   121  		jobEvals:            make(map[structs.NamespacedID]string),
   122  		blocked:             make(map[string]PendingEvaluations),
   123  		ready:               make(map[string]PendingEvaluations),
   124  		unack:               make(map[string]*unackEval),
   125  		waiting:             make(map[string]chan struct{}),
   126  		requeue:             make(map[string]*structs.Evaluation),
   127  		timeWait:            make(map[string]*time.Timer),
   128  		initialNackDelay:    initialNackDelay,
   129  		subsequentNackDelay: subsequentNackDelay,
   130  	}
   131  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   132  	return b, nil
   133  }
   134  
   135  // Enabled is used to check if the broker is enabled.
   136  func (b *EvalBroker) Enabled() bool {
   137  	b.l.RLock()
   138  	defer b.l.RUnlock()
   139  	return b.enabled
   140  }
   141  
   142  // SetEnabled is used to control if the broker is enabled. The broker
   143  // should only be enabled on the active leader.
   144  func (b *EvalBroker) SetEnabled(enabled bool) {
   145  	b.l.Lock()
   146  	b.enabled = enabled
   147  	b.l.Unlock()
   148  	if !enabled {
   149  		b.Flush()
   150  	}
   151  }
   152  
   153  // Enqueue is used to enqueue a new evaluation
   154  func (b *EvalBroker) Enqueue(eval *structs.Evaluation) {
   155  	b.l.Lock()
   156  	defer b.l.Unlock()
   157  	b.processEnqueue(eval, "")
   158  }
   159  
   160  // EnqueueAll is used to enqueue many evaluations. The map allows evaluations
   161  // that are being re-enqueued to include their token.
   162  //
   163  // When requeueing an evaluation that potentially may be already
   164  // enqueued. The evaluation is handled in one of the following ways:
   165  // * Evaluation not outstanding: Process as a normal Enqueue
   166  // * Evaluation outstanding: Do not allow the evaluation to be dequeued til:
   167  //    * Ack received:  Unblock the evaluation allowing it to be dequeued
   168  //    * Nack received: Drop the evaluation as it was created as a result of a
   169  //    scheduler run that was Nack'd
   170  func (b *EvalBroker) EnqueueAll(evals map[*structs.Evaluation]string) {
   171  	// The lock needs to be held until all evaluations are enqueued. This is so
   172  	// that when Dequeue operations are unblocked they will pick the highest
   173  	// priority evaluations.
   174  	b.l.Lock()
   175  	defer b.l.Unlock()
   176  	for eval, token := range evals {
   177  		b.processEnqueue(eval, token)
   178  	}
   179  }
   180  
   181  // processEnqueue deduplicates evals and either enqueue immediately or enforce
   182  // the evals wait time. If the token is passed, and the evaluation ID is
   183  // outstanding, the evaluation is blocked til an Ack/Nack is received.
   184  // processEnqueue must be called with the lock held.
   185  func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) {
   186  	// Check if already enqueued
   187  	if _, ok := b.evals[eval.ID]; ok {
   188  		if token == "" {
   189  			return
   190  		}
   191  
   192  		// If the token has been passed, the evaluation is being reblocked by
   193  		// the scheduler and should be processed once the outstanding evaluation
   194  		// is Acked or Nacked.
   195  		if unack, ok := b.unack[eval.ID]; ok && unack.Token == token {
   196  			b.requeue[token] = eval
   197  		}
   198  		return
   199  	} else if b.enabled {
   200  		b.evals[eval.ID] = 0
   201  	}
   202  
   203  	// Check if we need to enforce a wait
   204  	if eval.Wait > 0 {
   205  		b.processWaitingEnqueue(eval)
   206  		return
   207  	}
   208  
   209  	b.enqueueLocked(eval, eval.Type)
   210  }
   211  
   212  // processWaitingEnqueue waits the given duration on the evaluation before
   213  // enqueueing.
   214  func (b *EvalBroker) processWaitingEnqueue(eval *structs.Evaluation) {
   215  	timer := time.AfterFunc(eval.Wait, func() {
   216  		b.enqueueWaiting(eval)
   217  	})
   218  	b.timeWait[eval.ID] = timer
   219  	b.stats.TotalWaiting += 1
   220  }
   221  
   222  // enqueueWaiting is used to enqueue a waiting evaluation
   223  func (b *EvalBroker) enqueueWaiting(eval *structs.Evaluation) {
   224  	b.l.Lock()
   225  	defer b.l.Unlock()
   226  	delete(b.timeWait, eval.ID)
   227  	b.stats.TotalWaiting -= 1
   228  	b.enqueueLocked(eval, eval.Type)
   229  }
   230  
   231  // enqueueLocked is used to enqueue with the lock held
   232  func (b *EvalBroker) enqueueLocked(eval *structs.Evaluation, queue string) {
   233  	// Do nothing if not enabled
   234  	if !b.enabled {
   235  		return
   236  	}
   237  
   238  	// Check if there is an evaluation for this JobID pending
   239  	tuple := structs.NamespacedID{
   240  		ID:        eval.JobID,
   241  		Namespace: eval.Namespace,
   242  	}
   243  	pendingEval := b.jobEvals[tuple]
   244  	if pendingEval == "" {
   245  		b.jobEvals[tuple] = eval.ID
   246  	} else if pendingEval != eval.ID {
   247  		blocked := b.blocked[eval.JobID]
   248  		heap.Push(&blocked, eval)
   249  		b.blocked[eval.JobID] = blocked
   250  		b.stats.TotalBlocked += 1
   251  		return
   252  	}
   253  
   254  	// Find the pending by scheduler class
   255  	pending, ok := b.ready[queue]
   256  	if !ok {
   257  		pending = make([]*structs.Evaluation, 0, 16)
   258  		if _, ok := b.waiting[queue]; !ok {
   259  			b.waiting[queue] = make(chan struct{}, 1)
   260  		}
   261  	}
   262  
   263  	// Push onto the heap
   264  	heap.Push(&pending, eval)
   265  	b.ready[queue] = pending
   266  
   267  	// Update the stats
   268  	b.stats.TotalReady += 1
   269  	bySched, ok := b.stats.ByScheduler[queue]
   270  	if !ok {
   271  		bySched = &SchedulerStats{}
   272  		b.stats.ByScheduler[queue] = bySched
   273  	}
   274  	bySched.Ready += 1
   275  
   276  	// Unblock any blocked dequeues
   277  	select {
   278  	case b.waiting[queue] <- struct{}{}:
   279  	default:
   280  	}
   281  }
   282  
   283  // Dequeue is used to perform a blocking dequeue
   284  func (b *EvalBroker) Dequeue(schedulers []string, timeout time.Duration) (*structs.Evaluation, string, error) {
   285  	var timeoutTimer *time.Timer
   286  	var timeoutCh <-chan time.Time
   287  SCAN:
   288  	// Scan for work
   289  	eval, token, err := b.scanForSchedulers(schedulers)
   290  	if err != nil {
   291  		if timeoutTimer != nil {
   292  			timeoutTimer.Stop()
   293  		}
   294  		return nil, "", err
   295  	}
   296  
   297  	// Check if we have something
   298  	if eval != nil {
   299  		if timeoutTimer != nil {
   300  			timeoutTimer.Stop()
   301  		}
   302  		return eval, token, nil
   303  	}
   304  
   305  	// Setup the timeout channel the first time around
   306  	if timeoutTimer == nil && timeout != 0 {
   307  		timeoutTimer = time.NewTimer(timeout)
   308  		timeoutCh = timeoutTimer.C
   309  	}
   310  
   311  	// Block until we get work
   312  	scan := b.waitForSchedulers(schedulers, timeoutCh)
   313  	if scan {
   314  		goto SCAN
   315  	}
   316  	return nil, "", nil
   317  }
   318  
   319  // scanForSchedulers scans for work on any of the schedulers. The highest priority work
   320  // is dequeued first. This may return nothing if there is no work waiting.
   321  func (b *EvalBroker) scanForSchedulers(schedulers []string) (*structs.Evaluation, string, error) {
   322  	b.l.Lock()
   323  	defer b.l.Unlock()
   324  
   325  	// Do nothing if not enabled
   326  	if !b.enabled {
   327  		return nil, "", fmt.Errorf("eval broker disabled")
   328  	}
   329  
   330  	// Scan for eligible work
   331  	var eligibleSched []string
   332  	var eligiblePriority int
   333  	for _, sched := range schedulers {
   334  		// Get the pending queue
   335  		pending, ok := b.ready[sched]
   336  		if !ok {
   337  			continue
   338  		}
   339  
   340  		// Peek at the next item
   341  		ready := pending.Peek()
   342  		if ready == nil {
   343  			continue
   344  		}
   345  
   346  		// Add to eligible if equal or greater priority
   347  		if len(eligibleSched) == 0 || ready.Priority > eligiblePriority {
   348  			eligibleSched = []string{sched}
   349  			eligiblePriority = ready.Priority
   350  
   351  		} else if eligiblePriority > ready.Priority {
   352  			continue
   353  
   354  		} else if eligiblePriority == ready.Priority {
   355  			eligibleSched = append(eligibleSched, sched)
   356  		}
   357  	}
   358  
   359  	// Determine behavior based on eligible work
   360  	switch n := len(eligibleSched); n {
   361  	case 0:
   362  		// No work to do!
   363  		return nil, "", nil
   364  
   365  	case 1:
   366  		// Only a single task, dequeue
   367  		return b.dequeueForSched(eligibleSched[0])
   368  
   369  	default:
   370  		// Multiple tasks. We pick a random task so that we fairly
   371  		// distribute work.
   372  		offset := rand.Intn(n)
   373  		return b.dequeueForSched(eligibleSched[offset])
   374  	}
   375  }
   376  
   377  // dequeueForSched is used to dequeue the next work item for a given scheduler.
   378  // This assumes locks are held and that this scheduler has work
   379  func (b *EvalBroker) dequeueForSched(sched string) (*structs.Evaluation, string, error) {
   380  	// Get the pending queue
   381  	pending := b.ready[sched]
   382  	raw := heap.Pop(&pending)
   383  	b.ready[sched] = pending
   384  	eval := raw.(*structs.Evaluation)
   385  
   386  	// Generate a UUID for the token
   387  	token := uuid.Generate()
   388  
   389  	// Setup Nack timer
   390  	nackTimer := time.AfterFunc(b.nackTimeout, func() {
   391  		b.Nack(eval.ID, token)
   392  	})
   393  
   394  	// Add to the unack queue
   395  	b.unack[eval.ID] = &unackEval{
   396  		Eval:      eval,
   397  		Token:     token,
   398  		NackTimer: nackTimer,
   399  	}
   400  
   401  	// Increment the dequeue count
   402  	b.evals[eval.ID] += 1
   403  
   404  	// Update the stats
   405  	b.stats.TotalReady -= 1
   406  	b.stats.TotalUnacked += 1
   407  	bySched := b.stats.ByScheduler[sched]
   408  	bySched.Ready -= 1
   409  	bySched.Unacked += 1
   410  
   411  	return eval, token, nil
   412  }
   413  
   414  // waitForSchedulers is used to wait for work on any of the scheduler or until a timeout.
   415  // Returns if there is work waiting potentially.
   416  func (b *EvalBroker) waitForSchedulers(schedulers []string, timeoutCh <-chan time.Time) bool {
   417  	doneCh := make(chan struct{})
   418  	readyCh := make(chan struct{}, 1)
   419  	defer close(doneCh)
   420  
   421  	// Start all the watchers
   422  	b.l.Lock()
   423  	for _, sched := range schedulers {
   424  		waitCh, ok := b.waiting[sched]
   425  		if !ok {
   426  			waitCh = make(chan struct{}, 1)
   427  			b.waiting[sched] = waitCh
   428  		}
   429  
   430  		// Start a goroutine that either waits for the waitCh on this scheduler
   431  		// to unblock or for this waitForSchedulers call to return
   432  		go func() {
   433  			select {
   434  			case <-waitCh:
   435  				select {
   436  				case readyCh <- struct{}{}:
   437  				default:
   438  				}
   439  			case <-doneCh:
   440  			}
   441  		}()
   442  	}
   443  	b.l.Unlock()
   444  
   445  	// Block until we have ready work and should scan, or until we timeout
   446  	// and should not make an attempt to scan for work
   447  	select {
   448  	case <-readyCh:
   449  		return true
   450  	case <-timeoutCh:
   451  		return false
   452  	}
   453  }
   454  
   455  // Outstanding checks if an EvalID has been delivered but not acknowledged
   456  // and returns the associated token for the evaluation.
   457  func (b *EvalBroker) Outstanding(evalID string) (string, bool) {
   458  	b.l.RLock()
   459  	defer b.l.RUnlock()
   460  	unack, ok := b.unack[evalID]
   461  	if !ok {
   462  		return "", false
   463  	}
   464  	return unack.Token, true
   465  }
   466  
   467  // OutstandingReset resets the Nack timer for the EvalID if the
   468  // token matches and the eval is outstanding
   469  func (b *EvalBroker) OutstandingReset(evalID, token string) error {
   470  	b.l.RLock()
   471  	defer b.l.RUnlock()
   472  	unack, ok := b.unack[evalID]
   473  	if !ok {
   474  		return ErrNotOutstanding
   475  	}
   476  	if unack.Token != token {
   477  		return ErrTokenMismatch
   478  	}
   479  	if !unack.NackTimer.Reset(b.nackTimeout) {
   480  		return ErrNackTimeoutReached
   481  	}
   482  	return nil
   483  }
   484  
   485  // Ack is used to positively acknowledge handling an evaluation
   486  func (b *EvalBroker) Ack(evalID, token string) error {
   487  	b.l.Lock()
   488  	defer b.l.Unlock()
   489  
   490  	// Always delete the requeued evaluation. Either the Ack is successful and
   491  	// we requeue it or it isn't and we want to remove it.
   492  	defer delete(b.requeue, token)
   493  
   494  	// Lookup the unack'd eval
   495  	unack, ok := b.unack[evalID]
   496  	if !ok {
   497  		return fmt.Errorf("Evaluation ID not found")
   498  	}
   499  	if unack.Token != token {
   500  		return fmt.Errorf("Token does not match for Evaluation ID")
   501  	}
   502  	jobID := unack.Eval.JobID
   503  
   504  	// Ensure we were able to stop the timer
   505  	if !unack.NackTimer.Stop() {
   506  		return fmt.Errorf("Evaluation ID Ack'd after Nack timer expiration")
   507  	}
   508  
   509  	// Update the stats
   510  	b.stats.TotalUnacked -= 1
   511  	queue := unack.Eval.Type
   512  	if b.evals[evalID] > b.deliveryLimit {
   513  		queue = failedQueue
   514  	}
   515  	bySched := b.stats.ByScheduler[queue]
   516  	bySched.Unacked -= 1
   517  
   518  	// Cleanup
   519  	delete(b.unack, evalID)
   520  	delete(b.evals, evalID)
   521  
   522  	tuple := structs.NamespacedID{
   523  		ID:        jobID,
   524  		Namespace: unack.Eval.Namespace,
   525  	}
   526  	delete(b.jobEvals, tuple)
   527  
   528  	// Check if there are any blocked evaluations
   529  	if blocked := b.blocked[jobID]; len(blocked) != 0 {
   530  		raw := heap.Pop(&blocked)
   531  		if len(blocked) > 0 {
   532  			b.blocked[jobID] = blocked
   533  		} else {
   534  			delete(b.blocked, jobID)
   535  		}
   536  		eval := raw.(*structs.Evaluation)
   537  		b.stats.TotalBlocked -= 1
   538  		b.enqueueLocked(eval, eval.Type)
   539  	}
   540  
   541  	// Re-enqueue the evaluation.
   542  	if eval, ok := b.requeue[token]; ok {
   543  		b.processEnqueue(eval, "")
   544  	}
   545  
   546  	return nil
   547  }
   548  
   549  // Nack is used to negatively acknowledge handling an evaluation
   550  func (b *EvalBroker) Nack(evalID, token string) error {
   551  	b.l.Lock()
   552  	defer b.l.Unlock()
   553  
   554  	// Always delete the requeued evaluation since the Nack means the requeue is
   555  	// invalid.
   556  	delete(b.requeue, token)
   557  
   558  	// Lookup the unack'd eval
   559  	unack, ok := b.unack[evalID]
   560  	if !ok {
   561  		return fmt.Errorf("Evaluation ID not found")
   562  	}
   563  	if unack.Token != token {
   564  		return fmt.Errorf("Token does not match for Evaluation ID")
   565  	}
   566  
   567  	// Stop the timer, doesn't matter if we've missed it
   568  	unack.NackTimer.Stop()
   569  
   570  	// Cleanup
   571  	delete(b.unack, evalID)
   572  
   573  	// Update the stats
   574  	b.stats.TotalUnacked -= 1
   575  	bySched := b.stats.ByScheduler[unack.Eval.Type]
   576  	bySched.Unacked -= 1
   577  
   578  	// Check if we've hit the delivery limit, and re-enqueue
   579  	// in the failedQueue
   580  	if dequeues := b.evals[evalID]; dequeues >= b.deliveryLimit {
   581  		b.enqueueLocked(unack.Eval, failedQueue)
   582  	} else {
   583  		e := unack.Eval
   584  		e.Wait = b.nackReenqueueDelay(e, dequeues)
   585  
   586  		// See if there should be a delay before re-enqueuing
   587  		if e.Wait > 0 {
   588  			b.processWaitingEnqueue(e)
   589  		} else {
   590  			b.enqueueLocked(e, e.Type)
   591  		}
   592  	}
   593  
   594  	return nil
   595  }
   596  
   597  // nackReenqueueDelay is used to determine the delay that should be applied on
   598  // the evaluation given the number of previous attempts
   599  func (b *EvalBroker) nackReenqueueDelay(eval *structs.Evaluation, prevDequeues int) time.Duration {
   600  	switch {
   601  	case prevDequeues <= 0:
   602  		return 0
   603  	case prevDequeues == 1:
   604  		return b.initialNackDelay
   605  	default:
   606  		// For each subsequent nack compound a delay
   607  		return time.Duration(prevDequeues-1) * b.subsequentNackDelay
   608  	}
   609  }
   610  
   611  // PauseNackTimeout is used to pause the Nack timeout for an eval that is making
   612  // progress but is in a potentially unbounded operation such as the plan queue.
   613  func (b *EvalBroker) PauseNackTimeout(evalID, token string) error {
   614  	b.l.RLock()
   615  	defer b.l.RUnlock()
   616  	unack, ok := b.unack[evalID]
   617  	if !ok {
   618  		return ErrNotOutstanding
   619  	}
   620  	if unack.Token != token {
   621  		return ErrTokenMismatch
   622  	}
   623  	if !unack.NackTimer.Stop() {
   624  		return ErrNackTimeoutReached
   625  	}
   626  	return nil
   627  }
   628  
   629  // ResumeNackTimeout is used to resume the Nack timeout for an eval that was
   630  // paused. It should be resumed after leaving an unbounded operation.
   631  func (b *EvalBroker) ResumeNackTimeout(evalID, token string) error {
   632  	b.l.Lock()
   633  	defer b.l.Unlock()
   634  	unack, ok := b.unack[evalID]
   635  	if !ok {
   636  		return ErrNotOutstanding
   637  	}
   638  	if unack.Token != token {
   639  		return ErrTokenMismatch
   640  	}
   641  	unack.NackTimer.Reset(b.nackTimeout)
   642  	return nil
   643  }
   644  
   645  // Flush is used to clear the state of the broker
   646  func (b *EvalBroker) Flush() {
   647  	b.l.Lock()
   648  	defer b.l.Unlock()
   649  
   650  	// Unblock any waiters
   651  	for _, waitCh := range b.waiting {
   652  		close(waitCh)
   653  	}
   654  	b.waiting = make(map[string]chan struct{})
   655  
   656  	// Cancel any Nack timers
   657  	for _, unack := range b.unack {
   658  		unack.NackTimer.Stop()
   659  	}
   660  
   661  	// Cancel any time wait evals
   662  	for _, wait := range b.timeWait {
   663  		wait.Stop()
   664  	}
   665  
   666  	// Reset the broker
   667  	b.stats.TotalReady = 0
   668  	b.stats.TotalUnacked = 0
   669  	b.stats.TotalBlocked = 0
   670  	b.stats.TotalWaiting = 0
   671  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   672  	b.evals = make(map[string]int)
   673  	b.jobEvals = make(map[structs.NamespacedID]string)
   674  	b.blocked = make(map[string]PendingEvaluations)
   675  	b.ready = make(map[string]PendingEvaluations)
   676  	b.unack = make(map[string]*unackEval)
   677  	b.timeWait = make(map[string]*time.Timer)
   678  }
   679  
   680  // Stats is used to query the state of the broker
   681  func (b *EvalBroker) Stats() *BrokerStats {
   682  	// Allocate a new stats struct
   683  	stats := new(BrokerStats)
   684  	stats.ByScheduler = make(map[string]*SchedulerStats)
   685  
   686  	b.l.RLock()
   687  	defer b.l.RUnlock()
   688  
   689  	// Copy all the stats
   690  	stats.TotalReady = b.stats.TotalReady
   691  	stats.TotalUnacked = b.stats.TotalUnacked
   692  	stats.TotalBlocked = b.stats.TotalBlocked
   693  	stats.TotalWaiting = b.stats.TotalWaiting
   694  	for sched, subStat := range b.stats.ByScheduler {
   695  		subStatCopy := new(SchedulerStats)
   696  		*subStatCopy = *subStat
   697  		stats.ByScheduler[sched] = subStatCopy
   698  	}
   699  	return stats
   700  }
   701  
   702  // EmitStats is used to export metrics about the broker while enabled
   703  func (b *EvalBroker) EmitStats(period time.Duration, stopCh chan struct{}) {
   704  	for {
   705  		select {
   706  		case <-time.After(period):
   707  			stats := b.Stats()
   708  			metrics.SetGauge([]string{"nomad", "broker", "total_ready"}, float32(stats.TotalReady))
   709  			metrics.SetGauge([]string{"nomad", "broker", "total_unacked"}, float32(stats.TotalUnacked))
   710  			metrics.SetGauge([]string{"nomad", "broker", "total_blocked"}, float32(stats.TotalBlocked))
   711  			metrics.SetGauge([]string{"nomad", "broker", "total_waiting"}, float32(stats.TotalWaiting))
   712  			for sched, schedStats := range stats.ByScheduler {
   713  				metrics.SetGauge([]string{"nomad", "broker", sched, "ready"}, float32(schedStats.Ready))
   714  				metrics.SetGauge([]string{"nomad", "broker", sched, "unacked"}, float32(schedStats.Unacked))
   715  			}
   716  
   717  		case <-stopCh:
   718  			return
   719  		}
   720  	}
   721  }
   722  
   723  // BrokerStats returns all the stats about the broker
   724  type BrokerStats struct {
   725  	TotalReady   int
   726  	TotalUnacked int
   727  	TotalBlocked int
   728  	TotalWaiting int
   729  	ByScheduler  map[string]*SchedulerStats
   730  }
   731  
   732  // SchedulerStats returns the stats per scheduler
   733  type SchedulerStats struct {
   734  	Ready   int
   735  	Unacked int
   736  }
   737  
   738  // Len is for the sorting interface
   739  func (p PendingEvaluations) Len() int {
   740  	return len(p)
   741  }
   742  
   743  // Less is for the sorting interface. We flip the check
   744  // so that the "min" in the min-heap is the element with the
   745  // highest priority
   746  func (p PendingEvaluations) Less(i, j int) bool {
   747  	if p[i].JobID != p[j].JobID && p[i].Priority != p[j].Priority {
   748  		return !(p[i].Priority < p[j].Priority)
   749  	}
   750  	return p[i].CreateIndex < p[j].CreateIndex
   751  }
   752  
   753  // Swap is for the sorting interface
   754  func (p PendingEvaluations) Swap(i, j int) {
   755  	p[i], p[j] = p[j], p[i]
   756  }
   757  
   758  // Push is used to add a new evalution to the slice
   759  func (p *PendingEvaluations) Push(e interface{}) {
   760  	*p = append(*p, e.(*structs.Evaluation))
   761  }
   762  
   763  // Pop is used to remove an evaluation from the slice
   764  func (p *PendingEvaluations) Pop() interface{} {
   765  	n := len(*p)
   766  	e := (*p)[n-1]
   767  	(*p)[n-1] = nil
   768  	*p = (*p)[:n-1]
   769  	return e
   770  }
   771  
   772  // Peek is used to peek at the next element that would be popped
   773  func (p PendingEvaluations) Peek() *structs.Evaluation {
   774  	n := len(p)
   775  	if n == 0 {
   776  		return nil
   777  	}
   778  	return p[n-1]
   779  }