github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/eval_broker.go (about)

     1  package nomad
     2  
     3  import (
     4  	"container/heap"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/armon/go-metrics"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  const (
    16  	// failedQueue is the queue we add Evaluations to once
    17  	// they've reached the deliveryLimit. This allows the leader to
    18  	// set the status to failed.
    19  	failedQueue = "_failed"
    20  )
    21  
    22  var (
    23  	// ErrNotOutstanding is returned if an evaluation is not outstanding
    24  	ErrNotOutstanding = errors.New("evaluation is not outstanding")
    25  
    26  	// ErrTokenMismatch is the outstanding eval has a different token
    27  	ErrTokenMismatch = errors.New("evaluation token does not match")
    28  
    29  	// ErrNackTimeoutReached is returned if an expired evaluation is reset
    30  	ErrNackTimeoutReached = errors.New("evaluation nack timeout reached")
    31  )
    32  
    33  // EvalBroker is used to manage brokering of evaluations. When an evaluation is
    34  // created, due to a change in a job specification or a node, we put it into the
    35  // broker. The broker sorts by evaluations by priority and scheduler type. This
    36  // allows us to dequeue the highest priority work first, while also allowing sub-schedulers
    37  // to only dequeue work they know how to handle. The broker is designed to be entirely
    38  // in-memory and is managed by the leader node.
    39  //
    40  // The broker must provide at-least-once delivery semantics. It relies on explicit
    41  // Ack/Nack messages to handle this. If a delivery is not Ack'd in a sufficient time
    42  // span, it will be assumed Nack'd.
    43  type EvalBroker struct {
    44  	nackTimeout   time.Duration
    45  	deliveryLimit int
    46  
    47  	enabled bool
    48  	stats   *BrokerStats
    49  
    50  	// evals tracks queued evaluations by ID to de-duplicate enqueue.
    51  	// The counter is the number of times we've attempted delivery,
    52  	// and is used to eventually fail an evaluation.
    53  	evals map[string]int
    54  
    55  	// jobEvals tracks queued evaluations by JobID to serialize them
    56  	jobEvals map[string]string
    57  
    58  	// blocked tracks the blocked evaluations by JobID in a priority queue
    59  	blocked map[string]PendingEvaluations
    60  
    61  	// ready tracks the ready jobs by scheduler in a priority queue
    62  	ready map[string]PendingEvaluations
    63  
    64  	// unack is a map of evalID to an un-acknowledged evaluation
    65  	unack map[string]*unackEval
    66  
    67  	// waiting is used to notify on a per-scheduler basis of ready work
    68  	waiting map[string]chan struct{}
    69  
    70  	// requeue tracks evaluations that need to be re-enqueued once the current
    71  	// evaluation finishes by token. If the token is Nacked or rejected the
    72  	// evaluation is dropped but if Acked successfully, the evaluation is
    73  	// queued.
    74  	requeue map[string]*structs.Evaluation
    75  
    76  	// timeWait has evaluations that are waiting for time to elapse
    77  	timeWait map[string]*time.Timer
    78  
    79  	// initialNackDelay is the delay applied before reenqueuing a
    80  	// Nacked evaluation for the first time.
    81  	initialNackDelay time.Duration
    82  
    83  	// subsequentNackDelay is the delay applied before reenqueuing
    84  	// an evaluation that has been Nacked more than once. This delay is
    85  	// compounding after the first Nack.
    86  	subsequentNackDelay time.Duration
    87  
    88  	l sync.RWMutex
    89  }
    90  
    91  // unackEval tracks an unacknowledged evaluation along with the Nack timer
    92  type unackEval struct {
    93  	Eval      *structs.Evaluation
    94  	Token     string
    95  	NackTimer *time.Timer
    96  }
    97  
    98  // PendingEvaluations is a list of waiting evaluations.
    99  // We implement the container/heap interface so that this is a
   100  // priority queue
   101  type PendingEvaluations []*structs.Evaluation
   102  
   103  // NewEvalBroker creates a new evaluation broker. This is parameterized
   104  // with the timeout used for messages that are not acknowledged before we
   105  // assume a Nack and attempt to redeliver as well as the deliveryLimit
   106  // which prevents a failing eval from being endlessly delivered. The
   107  // initialNackDelay is the delay before making a Nacked evalution available
   108  // again for the first Nack and subsequentNackDelay is the compounding delay
   109  // after the first Nack.
   110  func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, deliveryLimit int) (*EvalBroker, error) {
   111  	if timeout < 0 {
   112  		return nil, fmt.Errorf("timeout cannot be negative")
   113  	}
   114  	b := &EvalBroker{
   115  		nackTimeout:         timeout,
   116  		deliveryLimit:       deliveryLimit,
   117  		enabled:             false,
   118  		stats:               new(BrokerStats),
   119  		evals:               make(map[string]int),
   120  		jobEvals:            make(map[string]string),
   121  		blocked:             make(map[string]PendingEvaluations),
   122  		ready:               make(map[string]PendingEvaluations),
   123  		unack:               make(map[string]*unackEval),
   124  		waiting:             make(map[string]chan struct{}),
   125  		requeue:             make(map[string]*structs.Evaluation),
   126  		timeWait:            make(map[string]*time.Timer),
   127  		initialNackDelay:    initialNackDelay,
   128  		subsequentNackDelay: subsequentNackDelay,
   129  	}
   130  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   131  	return b, nil
   132  }
   133  
   134  // Enabled is used to check if the broker is enabled.
   135  func (b *EvalBroker) Enabled() bool {
   136  	b.l.RLock()
   137  	defer b.l.RUnlock()
   138  	return b.enabled
   139  }
   140  
   141  // SetEnabled is used to control if the broker is enabled. The broker
   142  // should only be enabled on the active leader.
   143  func (b *EvalBroker) SetEnabled(enabled bool) {
   144  	b.l.Lock()
   145  	b.enabled = enabled
   146  	b.l.Unlock()
   147  	if !enabled {
   148  		b.Flush()
   149  	}
   150  }
   151  
   152  // Enqueue is used to enqueue a new evaluation
   153  func (b *EvalBroker) Enqueue(eval *structs.Evaluation) {
   154  	b.l.Lock()
   155  	defer b.l.Unlock()
   156  	b.processEnqueue(eval, "")
   157  }
   158  
   159  // EnqueueAll is used to enqueue many evaluations. The map allows evaluations
   160  // that are being re-enqueued to include their token.
   161  //
   162  // When requeueing an evaluation that potentially may be already
   163  // enqueued. The evaluation is handled in one of the following ways:
   164  // * Evaluation not outstanding: Process as a normal Enqueue
   165  // * Evaluation outstanding: Do not allow the evaluation to be dequeued til:
   166  //    * Ack received:  Unblock the evaluation allowing it to be dequeued
   167  //    * Nack received: Drop the evaluation as it was created as a result of a
   168  //    scheduler run that was Nack'd
   169  func (b *EvalBroker) EnqueueAll(evals map[*structs.Evaluation]string) {
   170  	// The lock needs to be held until all evaluations are enqueued. This is so
   171  	// that when Dequeue operations are unblocked they will pick the highest
   172  	// priority evaluations.
   173  	b.l.Lock()
   174  	defer b.l.Unlock()
   175  	for eval, token := range evals {
   176  		b.processEnqueue(eval, token)
   177  	}
   178  }
   179  
   180  // processEnqueue deduplicates evals and either enqueue immediately or enforce
   181  // the evals wait time. If the token is passed, and the evaluation ID is
   182  // outstanding, the evaluation is blocked til an Ack/Nack is received.
   183  // processEnqueue must be called with the lock held.
   184  func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) {
   185  	// Check if already enqueued
   186  	if _, ok := b.evals[eval.ID]; ok {
   187  		if token == "" {
   188  			return
   189  		}
   190  
   191  		// If the token has been passed, the evaluation is being reblocked by
   192  		// the scheduler and should be processed once the outstanding evaluation
   193  		// is Acked or Nacked.
   194  		if unack, ok := b.unack[eval.ID]; ok && unack.Token == token {
   195  			b.requeue[token] = eval
   196  		}
   197  		return
   198  	} else if b.enabled {
   199  		b.evals[eval.ID] = 0
   200  	}
   201  
   202  	// Check if we need to enforce a wait
   203  	if eval.Wait > 0 {
   204  		b.processWaitingEnqueue(eval)
   205  		return
   206  	}
   207  
   208  	b.enqueueLocked(eval, eval.Type)
   209  }
   210  
   211  // processWaitingEnqueue waits the given duration on the evaluation before
   212  // enqueueing.
   213  func (b *EvalBroker) processWaitingEnqueue(eval *structs.Evaluation) {
   214  	timer := time.AfterFunc(eval.Wait, func() {
   215  		b.enqueueWaiting(eval)
   216  	})
   217  	b.timeWait[eval.ID] = timer
   218  	b.stats.TotalWaiting += 1
   219  }
   220  
   221  // enqueueWaiting is used to enqueue a waiting evaluation
   222  func (b *EvalBroker) enqueueWaiting(eval *structs.Evaluation) {
   223  	b.l.Lock()
   224  	defer b.l.Unlock()
   225  	delete(b.timeWait, eval.ID)
   226  	b.stats.TotalWaiting -= 1
   227  	b.enqueueLocked(eval, eval.Type)
   228  }
   229  
   230  // enqueueLocked is used to enqueue with the lock held
   231  func (b *EvalBroker) enqueueLocked(eval *structs.Evaluation, queue string) {
   232  	// Do nothing if not enabled
   233  	if !b.enabled {
   234  		return
   235  	}
   236  
   237  	// Check if there is an evaluation for this JobID pending
   238  	pendingEval := b.jobEvals[eval.JobID]
   239  	if pendingEval == "" {
   240  		b.jobEvals[eval.JobID] = eval.ID
   241  	} else if pendingEval != eval.ID {
   242  		blocked := b.blocked[eval.JobID]
   243  		heap.Push(&blocked, eval)
   244  		b.blocked[eval.JobID] = blocked
   245  		b.stats.TotalBlocked += 1
   246  		return
   247  	}
   248  
   249  	// Find the pending by scheduler class
   250  	pending, ok := b.ready[queue]
   251  	if !ok {
   252  		pending = make([]*structs.Evaluation, 0, 16)
   253  		if _, ok := b.waiting[queue]; !ok {
   254  			b.waiting[queue] = make(chan struct{}, 1)
   255  		}
   256  	}
   257  
   258  	// Push onto the heap
   259  	heap.Push(&pending, eval)
   260  	b.ready[queue] = pending
   261  
   262  	// Update the stats
   263  	b.stats.TotalReady += 1
   264  	bySched, ok := b.stats.ByScheduler[queue]
   265  	if !ok {
   266  		bySched = &SchedulerStats{}
   267  		b.stats.ByScheduler[queue] = bySched
   268  	}
   269  	bySched.Ready += 1
   270  
   271  	// Unblock any blocked dequeues
   272  	select {
   273  	case b.waiting[queue] <- struct{}{}:
   274  	default:
   275  	}
   276  }
   277  
   278  // Dequeue is used to perform a blocking dequeue
   279  func (b *EvalBroker) Dequeue(schedulers []string, timeout time.Duration) (*structs.Evaluation, string, error) {
   280  	var timeoutTimer *time.Timer
   281  	var timeoutCh <-chan time.Time
   282  SCAN:
   283  	// Scan for work
   284  	eval, token, err := b.scanForSchedulers(schedulers)
   285  	if err != nil {
   286  		if timeoutTimer != nil {
   287  			timeoutTimer.Stop()
   288  		}
   289  		return nil, "", err
   290  	}
   291  
   292  	// Check if we have something
   293  	if eval != nil {
   294  		if timeoutTimer != nil {
   295  			timeoutTimer.Stop()
   296  		}
   297  		return eval, token, nil
   298  	}
   299  
   300  	// Setup the timeout channel the first time around
   301  	if timeoutTimer == nil && timeout != 0 {
   302  		timeoutTimer = time.NewTimer(timeout)
   303  		timeoutCh = timeoutTimer.C
   304  	}
   305  
   306  	// Block until we get work
   307  	scan := b.waitForSchedulers(schedulers, timeoutCh)
   308  	if scan {
   309  		goto SCAN
   310  	}
   311  	return nil, "", nil
   312  }
   313  
   314  // scanForSchedulers scans for work on any of the schedulers. The highest priority work
   315  // is dequeued first. This may return nothing if there is no work waiting.
   316  func (b *EvalBroker) scanForSchedulers(schedulers []string) (*structs.Evaluation, string, error) {
   317  	b.l.Lock()
   318  	defer b.l.Unlock()
   319  
   320  	// Do nothing if not enabled
   321  	if !b.enabled {
   322  		return nil, "", fmt.Errorf("eval broker disabled")
   323  	}
   324  
   325  	// Scan for eligible work
   326  	var eligibleSched []string
   327  	var eligiblePriority int
   328  	for _, sched := range schedulers {
   329  		// Get the pending queue
   330  		pending, ok := b.ready[sched]
   331  		if !ok {
   332  			continue
   333  		}
   334  
   335  		// Peek at the next item
   336  		ready := pending.Peek()
   337  		if ready == nil {
   338  			continue
   339  		}
   340  
   341  		// Add to eligible if equal or greater priority
   342  		if len(eligibleSched) == 0 || ready.Priority > eligiblePriority {
   343  			eligibleSched = []string{sched}
   344  			eligiblePriority = ready.Priority
   345  
   346  		} else if eligiblePriority > ready.Priority {
   347  			continue
   348  
   349  		} else if eligiblePriority == ready.Priority {
   350  			eligibleSched = append(eligibleSched, sched)
   351  		}
   352  	}
   353  
   354  	// Determine behavior based on eligible work
   355  	switch n := len(eligibleSched); n {
   356  	case 0:
   357  		// No work to do!
   358  		return nil, "", nil
   359  
   360  	case 1:
   361  		// Only a single task, dequeue
   362  		return b.dequeueForSched(eligibleSched[0])
   363  
   364  	default:
   365  		// Multiple tasks. We pick a random task so that we fairly
   366  		// distribute work.
   367  		offset := rand.Intn(n)
   368  		return b.dequeueForSched(eligibleSched[offset])
   369  	}
   370  }
   371  
   372  // dequeueForSched is used to dequeue the next work item for a given scheduler.
   373  // This assumes locks are held and that this scheduler has work
   374  func (b *EvalBroker) dequeueForSched(sched string) (*structs.Evaluation, string, error) {
   375  	// Get the pending queue
   376  	pending := b.ready[sched]
   377  	raw := heap.Pop(&pending)
   378  	b.ready[sched] = pending
   379  	eval := raw.(*structs.Evaluation)
   380  
   381  	// Generate a UUID for the token
   382  	token := structs.GenerateUUID()
   383  
   384  	// Setup Nack timer
   385  	nackTimer := time.AfterFunc(b.nackTimeout, func() {
   386  		b.Nack(eval.ID, token)
   387  	})
   388  
   389  	// Add to the unack queue
   390  	b.unack[eval.ID] = &unackEval{
   391  		Eval:      eval,
   392  		Token:     token,
   393  		NackTimer: nackTimer,
   394  	}
   395  
   396  	// Increment the dequeue count
   397  	b.evals[eval.ID] += 1
   398  
   399  	// Update the stats
   400  	b.stats.TotalReady -= 1
   401  	b.stats.TotalUnacked += 1
   402  	bySched := b.stats.ByScheduler[sched]
   403  	bySched.Ready -= 1
   404  	bySched.Unacked += 1
   405  
   406  	return eval, token, nil
   407  }
   408  
   409  // waitForSchedulers is used to wait for work on any of the scheduler or until a timeout.
   410  // Returns if there is work waiting potentially.
   411  func (b *EvalBroker) waitForSchedulers(schedulers []string, timeoutCh <-chan time.Time) bool {
   412  	doneCh := make(chan struct{})
   413  	readyCh := make(chan struct{}, 1)
   414  	defer close(doneCh)
   415  
   416  	// Start all the watchers
   417  	b.l.Lock()
   418  	for _, sched := range schedulers {
   419  		waitCh, ok := b.waiting[sched]
   420  		if !ok {
   421  			waitCh = make(chan struct{}, 1)
   422  			b.waiting[sched] = waitCh
   423  		}
   424  
   425  		// Start a goroutine that either waits for the waitCh on this scheduler
   426  		// to unblock or for this waitForSchedulers call to return
   427  		go func() {
   428  			select {
   429  			case <-waitCh:
   430  				select {
   431  				case readyCh <- struct{}{}:
   432  				default:
   433  				}
   434  			case <-doneCh:
   435  			}
   436  		}()
   437  	}
   438  	b.l.Unlock()
   439  
   440  	// Block until we have ready work and should scan, or until we timeout
   441  	// and should not make an attempt to scan for work
   442  	select {
   443  	case <-readyCh:
   444  		return true
   445  	case <-timeoutCh:
   446  		return false
   447  	}
   448  }
   449  
   450  // Outstanding checks if an EvalID has been delivered but not acknowledged
   451  // and returns the associated token for the evaluation.
   452  func (b *EvalBroker) Outstanding(evalID string) (string, bool) {
   453  	b.l.RLock()
   454  	defer b.l.RUnlock()
   455  	unack, ok := b.unack[evalID]
   456  	if !ok {
   457  		return "", false
   458  	}
   459  	return unack.Token, true
   460  }
   461  
   462  // OutstandingReset resets the Nack timer for the EvalID if the
   463  // token matches and the eval is outstanding
   464  func (b *EvalBroker) OutstandingReset(evalID, token string) error {
   465  	b.l.RLock()
   466  	defer b.l.RUnlock()
   467  	unack, ok := b.unack[evalID]
   468  	if !ok {
   469  		return ErrNotOutstanding
   470  	}
   471  	if unack.Token != token {
   472  		return ErrTokenMismatch
   473  	}
   474  	if !unack.NackTimer.Reset(b.nackTimeout) {
   475  		return ErrNackTimeoutReached
   476  	}
   477  	return nil
   478  }
   479  
   480  // Ack is used to positively acknowledge handling an evaluation
   481  func (b *EvalBroker) Ack(evalID, token string) error {
   482  	b.l.Lock()
   483  	defer b.l.Unlock()
   484  
   485  	// Always delete the requeued evaluation. Either the Ack is successful and
   486  	// we requeue it or it isn't and we want to remove it.
   487  	defer delete(b.requeue, token)
   488  
   489  	// Lookup the unack'd eval
   490  	unack, ok := b.unack[evalID]
   491  	if !ok {
   492  		return fmt.Errorf("Evaluation ID not found")
   493  	}
   494  	if unack.Token != token {
   495  		return fmt.Errorf("Token does not match for Evaluation ID")
   496  	}
   497  	jobID := unack.Eval.JobID
   498  
   499  	// Ensure we were able to stop the timer
   500  	if !unack.NackTimer.Stop() {
   501  		return fmt.Errorf("Evaluation ID Ack'd after Nack timer expiration")
   502  	}
   503  
   504  	// Update the stats
   505  	b.stats.TotalUnacked -= 1
   506  	queue := unack.Eval.Type
   507  	if b.evals[evalID] > b.deliveryLimit {
   508  		queue = failedQueue
   509  	}
   510  	bySched := b.stats.ByScheduler[queue]
   511  	bySched.Unacked -= 1
   512  
   513  	// Cleanup
   514  	delete(b.unack, evalID)
   515  	delete(b.evals, evalID)
   516  	delete(b.jobEvals, jobID)
   517  
   518  	// Check if there are any blocked evaluations
   519  	if blocked := b.blocked[jobID]; len(blocked) != 0 {
   520  		raw := heap.Pop(&blocked)
   521  		if len(blocked) > 0 {
   522  			b.blocked[jobID] = blocked
   523  		} else {
   524  			delete(b.blocked, jobID)
   525  		}
   526  		eval := raw.(*structs.Evaluation)
   527  		b.stats.TotalBlocked -= 1
   528  		b.enqueueLocked(eval, eval.Type)
   529  	}
   530  
   531  	// Re-enqueue the evaluation.
   532  	if eval, ok := b.requeue[token]; ok {
   533  		b.processEnqueue(eval, "")
   534  	}
   535  
   536  	return nil
   537  }
   538  
   539  // Nack is used to negatively acknowledge handling an evaluation
   540  func (b *EvalBroker) Nack(evalID, token string) error {
   541  	b.l.Lock()
   542  	defer b.l.Unlock()
   543  
   544  	// Always delete the requeued evaluation since the Nack means the requeue is
   545  	// invalid.
   546  	delete(b.requeue, token)
   547  
   548  	// Lookup the unack'd eval
   549  	unack, ok := b.unack[evalID]
   550  	if !ok {
   551  		return fmt.Errorf("Evaluation ID not found")
   552  	}
   553  	if unack.Token != token {
   554  		return fmt.Errorf("Token does not match for Evaluation ID")
   555  	}
   556  
   557  	// Stop the timer, doesn't matter if we've missed it
   558  	unack.NackTimer.Stop()
   559  
   560  	// Cleanup
   561  	delete(b.unack, evalID)
   562  
   563  	// Update the stats
   564  	b.stats.TotalUnacked -= 1
   565  	bySched := b.stats.ByScheduler[unack.Eval.Type]
   566  	bySched.Unacked -= 1
   567  
   568  	// Check if we've hit the delivery limit, and re-enqueue
   569  	// in the failedQueue
   570  	if dequeues := b.evals[evalID]; dequeues >= b.deliveryLimit {
   571  		b.enqueueLocked(unack.Eval, failedQueue)
   572  	} else {
   573  		e := unack.Eval
   574  		e.Wait = b.nackReenqueueDelay(e, dequeues)
   575  
   576  		// See if there should be a delay before re-enqueuing
   577  		if e.Wait > 0 {
   578  			b.processWaitingEnqueue(e)
   579  		} else {
   580  			b.enqueueLocked(e, e.Type)
   581  		}
   582  	}
   583  
   584  	return nil
   585  }
   586  
   587  // nackReenqueueDelay is used to determine the delay that should be applied on
   588  // the evaluation given the number of previous attempts
   589  func (b *EvalBroker) nackReenqueueDelay(eval *structs.Evaluation, prevDequeues int) time.Duration {
   590  	switch {
   591  	case prevDequeues <= 0:
   592  		return 0
   593  	case prevDequeues == 1:
   594  		return b.initialNackDelay
   595  	default:
   596  		// For each subsequent nack compound a delay
   597  		return time.Duration(prevDequeues-1) * b.subsequentNackDelay
   598  	}
   599  }
   600  
   601  // PauseNackTimeout is used to pause the Nack timeout for an eval that is making
   602  // progress but is in a potentially unbounded operation such as the plan queue.
   603  func (b *EvalBroker) PauseNackTimeout(evalID, token string) error {
   604  	b.l.RLock()
   605  	defer b.l.RUnlock()
   606  	unack, ok := b.unack[evalID]
   607  	if !ok {
   608  		return ErrNotOutstanding
   609  	}
   610  	if unack.Token != token {
   611  		return ErrTokenMismatch
   612  	}
   613  	if !unack.NackTimer.Stop() {
   614  		return ErrNackTimeoutReached
   615  	}
   616  	return nil
   617  }
   618  
   619  // ResumeNackTimeout is used to resume the Nack timeout for an eval that was
   620  // paused. It should be resumed after leaving an unbounded operation.
   621  func (b *EvalBroker) ResumeNackTimeout(evalID, token string) error {
   622  	b.l.Lock()
   623  	defer b.l.Unlock()
   624  	unack, ok := b.unack[evalID]
   625  	if !ok {
   626  		return ErrNotOutstanding
   627  	}
   628  	if unack.Token != token {
   629  		return ErrTokenMismatch
   630  	}
   631  	unack.NackTimer.Reset(b.nackTimeout)
   632  	return nil
   633  }
   634  
   635  // Flush is used to clear the state of the broker
   636  func (b *EvalBroker) Flush() {
   637  	b.l.Lock()
   638  	defer b.l.Unlock()
   639  
   640  	// Unblock any waiters
   641  	for _, waitCh := range b.waiting {
   642  		close(waitCh)
   643  	}
   644  	b.waiting = make(map[string]chan struct{})
   645  
   646  	// Cancel any Nack timers
   647  	for _, unack := range b.unack {
   648  		unack.NackTimer.Stop()
   649  	}
   650  
   651  	// Cancel any time wait evals
   652  	for _, wait := range b.timeWait {
   653  		wait.Stop()
   654  	}
   655  
   656  	// Reset the broker
   657  	b.stats.TotalReady = 0
   658  	b.stats.TotalUnacked = 0
   659  	b.stats.TotalBlocked = 0
   660  	b.stats.TotalWaiting = 0
   661  	b.stats.ByScheduler = make(map[string]*SchedulerStats)
   662  	b.evals = make(map[string]int)
   663  	b.jobEvals = make(map[string]string)
   664  	b.blocked = make(map[string]PendingEvaluations)
   665  	b.ready = make(map[string]PendingEvaluations)
   666  	b.unack = make(map[string]*unackEval)
   667  	b.timeWait = make(map[string]*time.Timer)
   668  }
   669  
   670  // Stats is used to query the state of the broker
   671  func (b *EvalBroker) Stats() *BrokerStats {
   672  	// Allocate a new stats struct
   673  	stats := new(BrokerStats)
   674  	stats.ByScheduler = make(map[string]*SchedulerStats)
   675  
   676  	b.l.RLock()
   677  	defer b.l.RUnlock()
   678  
   679  	// Copy all the stats
   680  	stats.TotalReady = b.stats.TotalReady
   681  	stats.TotalUnacked = b.stats.TotalUnacked
   682  	stats.TotalBlocked = b.stats.TotalBlocked
   683  	stats.TotalWaiting = b.stats.TotalWaiting
   684  	for sched, subStat := range b.stats.ByScheduler {
   685  		subStatCopy := new(SchedulerStats)
   686  		*subStatCopy = *subStat
   687  		stats.ByScheduler[sched] = subStatCopy
   688  	}
   689  	return stats
   690  }
   691  
   692  // EmitStats is used to export metrics about the broker while enabled
   693  func (b *EvalBroker) EmitStats(period time.Duration, stopCh chan struct{}) {
   694  	for {
   695  		select {
   696  		case <-time.After(period):
   697  			stats := b.Stats()
   698  			metrics.SetGauge([]string{"nomad", "broker", "total_ready"}, float32(stats.TotalReady))
   699  			metrics.SetGauge([]string{"nomad", "broker", "total_unacked"}, float32(stats.TotalUnacked))
   700  			metrics.SetGauge([]string{"nomad", "broker", "total_blocked"}, float32(stats.TotalBlocked))
   701  			metrics.SetGauge([]string{"nomad", "broker", "total_waiting"}, float32(stats.TotalWaiting))
   702  			for sched, schedStats := range stats.ByScheduler {
   703  				metrics.SetGauge([]string{"nomad", "broker", sched, "ready"}, float32(schedStats.Ready))
   704  				metrics.SetGauge([]string{"nomad", "broker", sched, "unacked"}, float32(schedStats.Unacked))
   705  			}
   706  
   707  		case <-stopCh:
   708  			return
   709  		}
   710  	}
   711  }
   712  
   713  // BrokerStats returns all the stats about the broker
   714  type BrokerStats struct {
   715  	TotalReady   int
   716  	TotalUnacked int
   717  	TotalBlocked int
   718  	TotalWaiting int
   719  	ByScheduler  map[string]*SchedulerStats
   720  }
   721  
   722  // SchedulerStats returns the stats per scheduler
   723  type SchedulerStats struct {
   724  	Ready   int
   725  	Unacked int
   726  }
   727  
   728  // Len is for the sorting interface
   729  func (p PendingEvaluations) Len() int {
   730  	return len(p)
   731  }
   732  
   733  // Less is for the sorting interface. We flip the check
   734  // so that the "min" in the min-heap is the element with the
   735  // highest priority
   736  func (p PendingEvaluations) Less(i, j int) bool {
   737  	if p[i].JobID != p[j].JobID && p[i].Priority != p[j].Priority {
   738  		return !(p[i].Priority < p[j].Priority)
   739  	}
   740  	return p[i].CreateIndex < p[j].CreateIndex
   741  }
   742  
   743  // Swap is for the sorting interface
   744  func (p PendingEvaluations) Swap(i, j int) {
   745  	p[i], p[j] = p[j], p[i]
   746  }
   747  
   748  // Push is used to add a new evalution to the slice
   749  func (p *PendingEvaluations) Push(e interface{}) {
   750  	*p = append(*p, e.(*structs.Evaluation))
   751  }
   752  
   753  // Pop is used to remove an evaluation from the slice
   754  func (p *PendingEvaluations) Pop() interface{} {
   755  	n := len(*p)
   756  	e := (*p)[n-1]
   757  	(*p)[n-1] = nil
   758  	*p = (*p)[:n-1]
   759  	return e
   760  }
   761  
   762  // Peek is used to peek at the next element that would be popped
   763  func (p PendingEvaluations) Peek() *structs.Evaluation {
   764  	n := len(p)
   765  	if n == 0 {
   766  		return nil
   767  	}
   768  	return p[n-1]
   769  }