github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/eval_broker.go (about) 1 package nomad 2 3 import ( 4 "container/heap" 5 "errors" 6 "fmt" 7 "math/rand" 8 "sync" 9 "time" 10 11 "context" 12 13 "github.com/armon/go-metrics" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/lib/delayheap" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 const ( 20 // failedQueue is the queue we add Evaluations to once 21 // they've reached the deliveryLimit. This allows the leader to 22 // set the status to failed. 23 failedQueue = "_failed" 24 ) 25 26 var ( 27 // ErrNotOutstanding is returned if an evaluation is not outstanding 28 ErrNotOutstanding = errors.New("evaluation is not outstanding") 29 30 // ErrTokenMismatch is the outstanding eval has a different token 31 ErrTokenMismatch = errors.New("evaluation token does not match") 32 33 // ErrNackTimeoutReached is returned if an expired evaluation is reset 34 ErrNackTimeoutReached = errors.New("evaluation nack timeout reached") 35 ) 36 37 // EvalBroker is used to manage brokering of evaluations. When an evaluation is 38 // created, due to a change in a job specification or a node, we put it into the 39 // broker. The broker sorts by evaluations by priority and scheduler type. This 40 // allows us to dequeue the highest priority work first, while also allowing sub-schedulers 41 // to only dequeue work they know how to handle. The broker is designed to be entirely 42 // in-memory and is managed by the leader node. 43 // 44 // The broker must provide at-least-once delivery semantics. It relies on explicit 45 // Ack/Nack messages to handle this. If a delivery is not Ack'd in a sufficient time 46 // span, it will be assumed Nack'd. 47 type EvalBroker struct { 48 nackTimeout time.Duration 49 deliveryLimit int 50 51 enabled bool 52 stats *BrokerStats 53 54 // evals tracks queued evaluations by ID to de-duplicate enqueue. 55 // The counter is the number of times we've attempted delivery, 56 // and is used to eventually fail an evaluation. 57 evals map[string]int 58 59 // jobEvals tracks queued evaluations by a job's ID and namespace to serialize them 60 jobEvals map[structs.NamespacedID]string 61 62 // blocked tracks the blocked evaluations by JobID in a priority queue 63 blocked map[structs.NamespacedID]PendingEvaluations 64 65 // ready tracks the ready jobs by scheduler in a priority queue 66 ready map[string]PendingEvaluations 67 68 // unack is a map of evalID to an un-acknowledged evaluation 69 unack map[string]*unackEval 70 71 // waiting is used to notify on a per-scheduler basis of ready work 72 waiting map[string]chan struct{} 73 74 // requeue tracks evaluations that need to be re-enqueued once the current 75 // evaluation finishes by token. If the token is Nacked or rejected the 76 // evaluation is dropped but if Acked successfully, the evaluation is 77 // queued. 78 requeue map[string]*structs.Evaluation 79 80 // timeWait has evaluations that are waiting for time to elapse 81 timeWait map[string]*time.Timer 82 83 // delayedEvalCancelFunc is used to stop the long running go routine 84 // that processes delayed evaluations 85 delayedEvalCancelFunc context.CancelFunc 86 87 // delayHeap is a heap used to track incoming evaluations that are 88 // not eligible to enqueue until their WaitTime 89 delayHeap *delayheap.DelayHeap 90 91 // delayedEvalsUpdateCh is used to trigger notifications for updates 92 // to the delayHeap 93 delayedEvalsUpdateCh chan struct{} 94 95 // initialNackDelay is the delay applied before re-enqueuing a 96 // Nacked evaluation for the first time. 97 initialNackDelay time.Duration 98 99 // subsequentNackDelay is the delay applied before reenqueuing 100 // an evaluation that has been Nacked more than once. This delay is 101 // compounding after the first Nack. 102 subsequentNackDelay time.Duration 103 104 l sync.RWMutex 105 } 106 107 // unackEval tracks an unacknowledged evaluation along with the Nack timer 108 type unackEval struct { 109 Eval *structs.Evaluation 110 Token string 111 NackTimer *time.Timer 112 } 113 114 // PendingEvaluations is a list of waiting evaluations. 115 // We implement the container/heap interface so that this is a 116 // priority queue 117 type PendingEvaluations []*structs.Evaluation 118 119 // NewEvalBroker creates a new evaluation broker. This is parameterized 120 // with the timeout used for messages that are not acknowledged before we 121 // assume a Nack and attempt to redeliver as well as the deliveryLimit 122 // which prevents a failing eval from being endlessly delivered. The 123 // initialNackDelay is the delay before making a Nacked evaluation available 124 // again for the first Nack and subsequentNackDelay is the compounding delay 125 // after the first Nack. 126 func NewEvalBroker(timeout, initialNackDelay, subsequentNackDelay time.Duration, deliveryLimit int) (*EvalBroker, error) { 127 if timeout < 0 { 128 return nil, fmt.Errorf("timeout cannot be negative") 129 } 130 b := &EvalBroker{ 131 nackTimeout: timeout, 132 deliveryLimit: deliveryLimit, 133 enabled: false, 134 stats: new(BrokerStats), 135 evals: make(map[string]int), 136 jobEvals: make(map[structs.NamespacedID]string), 137 blocked: make(map[structs.NamespacedID]PendingEvaluations), 138 ready: make(map[string]PendingEvaluations), 139 unack: make(map[string]*unackEval), 140 waiting: make(map[string]chan struct{}), 141 requeue: make(map[string]*structs.Evaluation), 142 timeWait: make(map[string]*time.Timer), 143 initialNackDelay: initialNackDelay, 144 subsequentNackDelay: subsequentNackDelay, 145 delayHeap: delayheap.NewDelayHeap(), 146 delayedEvalsUpdateCh: make(chan struct{}, 1), 147 } 148 b.stats.ByScheduler = make(map[string]*SchedulerStats) 149 150 return b, nil 151 } 152 153 // Enabled is used to check if the broker is enabled. 154 func (b *EvalBroker) Enabled() bool { 155 b.l.RLock() 156 defer b.l.RUnlock() 157 return b.enabled 158 } 159 160 // SetEnabled is used to control if the broker is enabled. The broker 161 // should only be enabled on the active leader. 162 func (b *EvalBroker) SetEnabled(enabled bool) { 163 b.l.Lock() 164 prevEnabled := b.enabled 165 b.enabled = enabled 166 if !prevEnabled && enabled { 167 // start the go routine for delayed evals 168 ctx, cancel := context.WithCancel(context.Background()) 169 b.delayedEvalCancelFunc = cancel 170 go b.runDelayedEvalsWatcher(ctx) 171 } 172 b.l.Unlock() 173 if !enabled { 174 b.flush() 175 } 176 } 177 178 // Enqueue is used to enqueue a new evaluation 179 func (b *EvalBroker) Enqueue(eval *structs.Evaluation) { 180 b.l.Lock() 181 defer b.l.Unlock() 182 b.processEnqueue(eval, "") 183 } 184 185 // EnqueueAll is used to enqueue many evaluations. The map allows evaluations 186 // that are being re-enqueued to include their token. 187 // 188 // When requeuing an evaluation that potentially may be already 189 // enqueued. The evaluation is handled in one of the following ways: 190 // * Evaluation not outstanding: Process as a normal Enqueue 191 // * Evaluation outstanding: Do not allow the evaluation to be dequeued til: 192 // * Ack received: Unblock the evaluation allowing it to be dequeued 193 // * Nack received: Drop the evaluation as it was created as a result of a 194 // scheduler run that was Nack'd 195 func (b *EvalBroker) EnqueueAll(evals map[*structs.Evaluation]string) { 196 // The lock needs to be held until all evaluations are enqueued. This is so 197 // that when Dequeue operations are unblocked they will pick the highest 198 // priority evaluations. 199 b.l.Lock() 200 defer b.l.Unlock() 201 for eval, token := range evals { 202 b.processEnqueue(eval, token) 203 } 204 } 205 206 // processEnqueue deduplicates evals and either enqueue immediately or enforce 207 // the evals wait time. If the token is passed, and the evaluation ID is 208 // outstanding, the evaluation is blocked until an Ack/Nack is received. 209 // processEnqueue must be called with the lock held. 210 func (b *EvalBroker) processEnqueue(eval *structs.Evaluation, token string) { 211 // Check if already enqueued 212 if _, ok := b.evals[eval.ID]; ok { 213 if token == "" { 214 return 215 } 216 217 // If the token has been passed, the evaluation is being reblocked by 218 // the scheduler and should be processed once the outstanding evaluation 219 // is Acked or Nacked. 220 if unack, ok := b.unack[eval.ID]; ok && unack.Token == token { 221 b.requeue[token] = eval 222 } 223 return 224 } else if b.enabled { 225 b.evals[eval.ID] = 0 226 } 227 228 // Check if we need to enforce a wait 229 if eval.Wait > 0 { 230 b.processWaitingEnqueue(eval) 231 return 232 } 233 234 if !eval.WaitUntil.IsZero() { 235 b.delayHeap.Push(&evalWrapper{eval}, eval.WaitUntil) 236 b.stats.TotalWaiting += 1 237 // Signal an update. 238 select { 239 case b.delayedEvalsUpdateCh <- struct{}{}: 240 default: 241 } 242 return 243 } 244 245 b.enqueueLocked(eval, eval.Type) 246 } 247 248 // processWaitingEnqueue waits the given duration on the evaluation before 249 // enqueuing. 250 func (b *EvalBroker) processWaitingEnqueue(eval *structs.Evaluation) { 251 timer := time.AfterFunc(eval.Wait, func() { 252 b.enqueueWaiting(eval) 253 }) 254 b.timeWait[eval.ID] = timer 255 b.stats.TotalWaiting += 1 256 } 257 258 // enqueueWaiting is used to enqueue a waiting evaluation 259 func (b *EvalBroker) enqueueWaiting(eval *structs.Evaluation) { 260 b.l.Lock() 261 defer b.l.Unlock() 262 delete(b.timeWait, eval.ID) 263 b.stats.TotalWaiting -= 1 264 b.enqueueLocked(eval, eval.Type) 265 } 266 267 // enqueueLocked is used to enqueue with the lock held 268 func (b *EvalBroker) enqueueLocked(eval *structs.Evaluation, queue string) { 269 // Do nothing if not enabled 270 if !b.enabled { 271 return 272 } 273 274 // Check if there is an evaluation for this JobID pending 275 namespacedID := structs.NamespacedID{ 276 ID: eval.JobID, 277 Namespace: eval.Namespace, 278 } 279 pendingEval := b.jobEvals[namespacedID] 280 if pendingEval == "" { 281 b.jobEvals[namespacedID] = eval.ID 282 } else if pendingEval != eval.ID { 283 blocked := b.blocked[namespacedID] 284 heap.Push(&blocked, eval) 285 b.blocked[namespacedID] = blocked 286 b.stats.TotalBlocked += 1 287 return 288 } 289 290 // Find the pending by scheduler class 291 pending, ok := b.ready[queue] 292 if !ok { 293 pending = make([]*structs.Evaluation, 0, 16) 294 if _, ok := b.waiting[queue]; !ok { 295 b.waiting[queue] = make(chan struct{}, 1) 296 } 297 } 298 299 // Push onto the heap 300 heap.Push(&pending, eval) 301 b.ready[queue] = pending 302 303 // Update the stats 304 b.stats.TotalReady += 1 305 bySched, ok := b.stats.ByScheduler[queue] 306 if !ok { 307 bySched = &SchedulerStats{} 308 b.stats.ByScheduler[queue] = bySched 309 } 310 bySched.Ready += 1 311 312 // Unblock any blocked dequeues 313 select { 314 case b.waiting[queue] <- struct{}{}: 315 default: 316 } 317 } 318 319 // Dequeue is used to perform a blocking dequeue 320 func (b *EvalBroker) Dequeue(schedulers []string, timeout time.Duration) (*structs.Evaluation, string, error) { 321 var timeoutTimer *time.Timer 322 var timeoutCh <-chan time.Time 323 SCAN: 324 // Scan for work 325 eval, token, err := b.scanForSchedulers(schedulers) 326 if err != nil { 327 if timeoutTimer != nil { 328 timeoutTimer.Stop() 329 } 330 return nil, "", err 331 } 332 333 // Check if we have something 334 if eval != nil { 335 if timeoutTimer != nil { 336 timeoutTimer.Stop() 337 } 338 return eval, token, nil 339 } 340 341 // Setup the timeout channel the first time around 342 if timeoutTimer == nil && timeout != 0 { 343 timeoutTimer = time.NewTimer(timeout) 344 timeoutCh = timeoutTimer.C 345 } 346 347 // Block until we get work 348 scan := b.waitForSchedulers(schedulers, timeoutCh) 349 if scan { 350 goto SCAN 351 } 352 return nil, "", nil 353 } 354 355 // scanForSchedulers scans for work on any of the schedulers. The highest priority work 356 // is dequeued first. This may return nothing if there is no work waiting. 357 func (b *EvalBroker) scanForSchedulers(schedulers []string) (*structs.Evaluation, string, error) { 358 b.l.Lock() 359 defer b.l.Unlock() 360 361 // Do nothing if not enabled 362 if !b.enabled { 363 return nil, "", fmt.Errorf("eval broker disabled") 364 } 365 366 // Scan for eligible work 367 var eligibleSched []string 368 var eligiblePriority int 369 for _, sched := range schedulers { 370 // Get the pending queue 371 pending, ok := b.ready[sched] 372 if !ok { 373 continue 374 } 375 376 // Peek at the next item 377 ready := pending.Peek() 378 if ready == nil { 379 continue 380 } 381 382 // Add to eligible if equal or greater priority 383 if len(eligibleSched) == 0 || ready.Priority > eligiblePriority { 384 eligibleSched = []string{sched} 385 eligiblePriority = ready.Priority 386 387 } else if eligiblePriority > ready.Priority { 388 continue 389 390 } else if eligiblePriority == ready.Priority { 391 eligibleSched = append(eligibleSched, sched) 392 } 393 } 394 395 // Determine behavior based on eligible work 396 switch n := len(eligibleSched); n { 397 case 0: 398 // No work to do! 399 return nil, "", nil 400 401 case 1: 402 // Only a single task, dequeue 403 return b.dequeueForSched(eligibleSched[0]) 404 405 default: 406 // Multiple tasks. We pick a random task so that we fairly 407 // distribute work. 408 offset := rand.Intn(n) 409 return b.dequeueForSched(eligibleSched[offset]) 410 } 411 } 412 413 // dequeueForSched is used to dequeue the next work item for a given scheduler. 414 // This assumes locks are held and that this scheduler has work 415 func (b *EvalBroker) dequeueForSched(sched string) (*structs.Evaluation, string, error) { 416 // Get the pending queue 417 pending := b.ready[sched] 418 raw := heap.Pop(&pending) 419 b.ready[sched] = pending 420 eval := raw.(*structs.Evaluation) 421 422 // Generate a UUID for the token 423 token := uuid.Generate() 424 425 // Setup Nack timer 426 nackTimer := time.AfterFunc(b.nackTimeout, func() { 427 b.Nack(eval.ID, token) 428 }) 429 430 // Add to the unack queue 431 b.unack[eval.ID] = &unackEval{ 432 Eval: eval, 433 Token: token, 434 NackTimer: nackTimer, 435 } 436 437 // Increment the dequeue count 438 b.evals[eval.ID] += 1 439 440 // Update the stats 441 b.stats.TotalReady -= 1 442 b.stats.TotalUnacked += 1 443 bySched := b.stats.ByScheduler[sched] 444 bySched.Ready -= 1 445 bySched.Unacked += 1 446 447 return eval, token, nil 448 } 449 450 // waitForSchedulers is used to wait for work on any of the scheduler or until a timeout. 451 // Returns if there is work waiting potentially. 452 func (b *EvalBroker) waitForSchedulers(schedulers []string, timeoutCh <-chan time.Time) bool { 453 doneCh := make(chan struct{}) 454 readyCh := make(chan struct{}, 1) 455 defer close(doneCh) 456 457 // Start all the watchers 458 b.l.Lock() 459 for _, sched := range schedulers { 460 waitCh, ok := b.waiting[sched] 461 if !ok { 462 waitCh = make(chan struct{}, 1) 463 b.waiting[sched] = waitCh 464 } 465 466 // Start a goroutine that either waits for the waitCh on this scheduler 467 // to unblock or for this waitForSchedulers call to return 468 go func() { 469 select { 470 case <-waitCh: 471 select { 472 case readyCh <- struct{}{}: 473 default: 474 } 475 case <-doneCh: 476 } 477 }() 478 } 479 b.l.Unlock() 480 481 // Block until we have ready work and should scan, or until we timeout 482 // and should not make an attempt to scan for work 483 select { 484 case <-readyCh: 485 return true 486 case <-timeoutCh: 487 return false 488 } 489 } 490 491 // Outstanding checks if an EvalID has been delivered but not acknowledged 492 // and returns the associated token for the evaluation. 493 func (b *EvalBroker) Outstanding(evalID string) (string, bool) { 494 b.l.RLock() 495 defer b.l.RUnlock() 496 unack, ok := b.unack[evalID] 497 if !ok { 498 return "", false 499 } 500 return unack.Token, true 501 } 502 503 // OutstandingReset resets the Nack timer for the EvalID if the 504 // token matches and the eval is outstanding 505 func (b *EvalBroker) OutstandingReset(evalID, token string) error { 506 b.l.RLock() 507 defer b.l.RUnlock() 508 unack, ok := b.unack[evalID] 509 if !ok { 510 return ErrNotOutstanding 511 } 512 if unack.Token != token { 513 return ErrTokenMismatch 514 } 515 if !unack.NackTimer.Reset(b.nackTimeout) { 516 return ErrNackTimeoutReached 517 } 518 return nil 519 } 520 521 // Ack is used to positively acknowledge handling an evaluation 522 func (b *EvalBroker) Ack(evalID, token string) error { 523 b.l.Lock() 524 defer b.l.Unlock() 525 526 // Always delete the requeued evaluation. Either the Ack is successful and 527 // we requeue it or it isn't and we want to remove it. 528 defer delete(b.requeue, token) 529 530 // Lookup the unack'd eval 531 unack, ok := b.unack[evalID] 532 if !ok { 533 return fmt.Errorf("Evaluation ID not found") 534 } 535 if unack.Token != token { 536 return fmt.Errorf("Token does not match for Evaluation ID") 537 } 538 jobID := unack.Eval.JobID 539 540 // Ensure we were able to stop the timer 541 if !unack.NackTimer.Stop() { 542 return fmt.Errorf("Evaluation ID Ack'd after Nack timer expiration") 543 } 544 545 // Update the stats 546 b.stats.TotalUnacked -= 1 547 queue := unack.Eval.Type 548 if b.evals[evalID] > b.deliveryLimit { 549 queue = failedQueue 550 } 551 bySched := b.stats.ByScheduler[queue] 552 bySched.Unacked -= 1 553 554 // Cleanup 555 delete(b.unack, evalID) 556 delete(b.evals, evalID) 557 558 namespacedID := structs.NamespacedID{ 559 ID: jobID, 560 Namespace: unack.Eval.Namespace, 561 } 562 delete(b.jobEvals, namespacedID) 563 564 // Check if there are any blocked evaluations 565 if blocked := b.blocked[namespacedID]; len(blocked) != 0 { 566 raw := heap.Pop(&blocked) 567 if len(blocked) > 0 { 568 b.blocked[namespacedID] = blocked 569 } else { 570 delete(b.blocked, namespacedID) 571 } 572 eval := raw.(*structs.Evaluation) 573 b.stats.TotalBlocked -= 1 574 b.enqueueLocked(eval, eval.Type) 575 } 576 577 // Re-enqueue the evaluation. 578 if eval, ok := b.requeue[token]; ok { 579 b.processEnqueue(eval, "") 580 } 581 582 return nil 583 } 584 585 // Nack is used to negatively acknowledge handling an evaluation 586 func (b *EvalBroker) Nack(evalID, token string) error { 587 b.l.Lock() 588 defer b.l.Unlock() 589 590 // Always delete the requeued evaluation since the Nack means the requeue is 591 // invalid. 592 delete(b.requeue, token) 593 594 // Lookup the unack'd eval 595 unack, ok := b.unack[evalID] 596 if !ok { 597 return fmt.Errorf("Evaluation ID not found") 598 } 599 if unack.Token != token { 600 return fmt.Errorf("Token does not match for Evaluation ID") 601 } 602 603 // Stop the timer, doesn't matter if we've missed it 604 unack.NackTimer.Stop() 605 606 // Cleanup 607 delete(b.unack, evalID) 608 609 // Update the stats 610 b.stats.TotalUnacked -= 1 611 bySched := b.stats.ByScheduler[unack.Eval.Type] 612 bySched.Unacked -= 1 613 614 // Check if we've hit the delivery limit, and re-enqueue 615 // in the failedQueue 616 if dequeues := b.evals[evalID]; dequeues >= b.deliveryLimit { 617 b.enqueueLocked(unack.Eval, failedQueue) 618 } else { 619 e := unack.Eval 620 e.Wait = b.nackReenqueueDelay(e, dequeues) 621 622 // See if there should be a delay before re-enqueuing 623 if e.Wait > 0 { 624 b.processWaitingEnqueue(e) 625 } else { 626 b.enqueueLocked(e, e.Type) 627 } 628 } 629 630 return nil 631 } 632 633 // nackReenqueueDelay is used to determine the delay that should be applied on 634 // the evaluation given the number of previous attempts 635 func (b *EvalBroker) nackReenqueueDelay(eval *structs.Evaluation, prevDequeues int) time.Duration { 636 switch { 637 case prevDequeues <= 0: 638 return 0 639 case prevDequeues == 1: 640 return b.initialNackDelay 641 default: 642 // For each subsequent nack compound a delay 643 return time.Duration(prevDequeues-1) * b.subsequentNackDelay 644 } 645 } 646 647 // PauseNackTimeout is used to pause the Nack timeout for an eval that is making 648 // progress but is in a potentially unbounded operation such as the plan queue. 649 func (b *EvalBroker) PauseNackTimeout(evalID, token string) error { 650 b.l.RLock() 651 defer b.l.RUnlock() 652 unack, ok := b.unack[evalID] 653 if !ok { 654 return ErrNotOutstanding 655 } 656 if unack.Token != token { 657 return ErrTokenMismatch 658 } 659 if !unack.NackTimer.Stop() { 660 return ErrNackTimeoutReached 661 } 662 return nil 663 } 664 665 // ResumeNackTimeout is used to resume the Nack timeout for an eval that was 666 // paused. It should be resumed after leaving an unbounded operation. 667 func (b *EvalBroker) ResumeNackTimeout(evalID, token string) error { 668 b.l.Lock() 669 defer b.l.Unlock() 670 unack, ok := b.unack[evalID] 671 if !ok { 672 return ErrNotOutstanding 673 } 674 if unack.Token != token { 675 return ErrTokenMismatch 676 } 677 unack.NackTimer.Reset(b.nackTimeout) 678 return nil 679 } 680 681 // Flush is used to clear the state of the broker 682 func (b *EvalBroker) flush() { 683 b.l.Lock() 684 defer b.l.Unlock() 685 686 // Unblock any waiters 687 for _, waitCh := range b.waiting { 688 close(waitCh) 689 } 690 b.waiting = make(map[string]chan struct{}) 691 692 // Cancel any Nack timers 693 for _, unack := range b.unack { 694 unack.NackTimer.Stop() 695 } 696 697 // Cancel any time wait evals 698 for _, wait := range b.timeWait { 699 wait.Stop() 700 } 701 702 // Cancel the delayed evaluations goroutine 703 if b.delayedEvalCancelFunc != nil { 704 b.delayedEvalCancelFunc() 705 } 706 707 // Clear out the update channel for delayed evaluations 708 b.delayedEvalsUpdateCh = make(chan struct{}, 1) 709 710 // Reset the broker 711 b.stats.TotalReady = 0 712 b.stats.TotalUnacked = 0 713 b.stats.TotalBlocked = 0 714 b.stats.TotalWaiting = 0 715 b.stats.ByScheduler = make(map[string]*SchedulerStats) 716 b.evals = make(map[string]int) 717 b.jobEvals = make(map[structs.NamespacedID]string) 718 b.blocked = make(map[structs.NamespacedID]PendingEvaluations) 719 b.ready = make(map[string]PendingEvaluations) 720 b.unack = make(map[string]*unackEval) 721 b.timeWait = make(map[string]*time.Timer) 722 b.delayHeap = delayheap.NewDelayHeap() 723 } 724 725 // evalWrapper satisfies the HeapNode interface 726 type evalWrapper struct { 727 eval *structs.Evaluation 728 } 729 730 func (d *evalWrapper) Data() interface{} { 731 return d.eval 732 } 733 734 func (d *evalWrapper) ID() string { 735 return d.eval.ID 736 } 737 738 func (d *evalWrapper) Namespace() string { 739 return d.eval.Namespace 740 } 741 742 // runDelayedEvalsWatcher is a long-lived function that waits till a time deadline is met for 743 // pending evaluations before enqueuing them 744 func (b *EvalBroker) runDelayedEvalsWatcher(ctx context.Context) { 745 var timerChannel <-chan time.Time 746 var delayTimer *time.Timer 747 for { 748 eval, waitUntil := b.nextDelayedEval() 749 if waitUntil.IsZero() { 750 timerChannel = nil 751 } else { 752 launchDur := waitUntil.Sub(time.Now().UTC()) 753 if delayTimer == nil { 754 delayTimer = time.NewTimer(launchDur) 755 } else { 756 delayTimer.Reset(launchDur) 757 } 758 timerChannel = delayTimer.C 759 } 760 761 select { 762 case <-ctx.Done(): 763 return 764 case <-timerChannel: 765 // remove from the heap since we can enqueue it now 766 b.l.Lock() 767 b.delayHeap.Remove(&evalWrapper{eval}) 768 b.stats.TotalWaiting -= 1 769 b.enqueueLocked(eval, eval.Type) 770 b.l.Unlock() 771 case <-b.delayedEvalsUpdateCh: 772 continue 773 } 774 } 775 } 776 777 // nextDelayedEval returns the next delayed eval to launch and when it should be enqueued. 778 // This peeks at the heap to return the top. If the heap is empty, this returns nil and zero time. 779 func (b *EvalBroker) nextDelayedEval() (*structs.Evaluation, time.Time) { 780 b.l.RLock() 781 // If there is nothing wait for an update. 782 if b.delayHeap.Length() == 0 { 783 b.l.RUnlock() 784 return nil, time.Time{} 785 } 786 nextEval := b.delayHeap.Peek() 787 b.l.RUnlock() 788 if nextEval == nil { 789 return nil, time.Time{} 790 } 791 eval := nextEval.Node.Data().(*structs.Evaluation) 792 return eval, nextEval.WaitUntil 793 } 794 795 // Stats is used to query the state of the broker 796 func (b *EvalBroker) Stats() *BrokerStats { 797 // Allocate a new stats struct 798 stats := new(BrokerStats) 799 stats.ByScheduler = make(map[string]*SchedulerStats) 800 801 b.l.RLock() 802 defer b.l.RUnlock() 803 804 // Copy all the stats 805 stats.TotalReady = b.stats.TotalReady 806 stats.TotalUnacked = b.stats.TotalUnacked 807 stats.TotalBlocked = b.stats.TotalBlocked 808 stats.TotalWaiting = b.stats.TotalWaiting 809 for sched, subStat := range b.stats.ByScheduler { 810 subStatCopy := new(SchedulerStats) 811 *subStatCopy = *subStat 812 stats.ByScheduler[sched] = subStatCopy 813 } 814 return stats 815 } 816 817 // EmitStats is used to export metrics about the broker while enabled 818 func (b *EvalBroker) EmitStats(period time.Duration, stopCh chan struct{}) { 819 for { 820 select { 821 case <-time.After(period): 822 stats := b.Stats() 823 metrics.SetGauge([]string{"nomad", "broker", "total_ready"}, float32(stats.TotalReady)) 824 metrics.SetGauge([]string{"nomad", "broker", "total_unacked"}, float32(stats.TotalUnacked)) 825 metrics.SetGauge([]string{"nomad", "broker", "total_blocked"}, float32(stats.TotalBlocked)) 826 metrics.SetGauge([]string{"nomad", "broker", "total_waiting"}, float32(stats.TotalWaiting)) 827 for sched, schedStats := range stats.ByScheduler { 828 metrics.SetGauge([]string{"nomad", "broker", sched, "ready"}, float32(schedStats.Ready)) 829 metrics.SetGauge([]string{"nomad", "broker", sched, "unacked"}, float32(schedStats.Unacked)) 830 } 831 832 case <-stopCh: 833 return 834 } 835 } 836 } 837 838 // BrokerStats returns all the stats about the broker 839 type BrokerStats struct { 840 TotalReady int 841 TotalUnacked int 842 TotalBlocked int 843 TotalWaiting int 844 ByScheduler map[string]*SchedulerStats 845 } 846 847 // SchedulerStats returns the stats per scheduler 848 type SchedulerStats struct { 849 Ready int 850 Unacked int 851 } 852 853 // Len is for the sorting interface 854 func (p PendingEvaluations) Len() int { 855 return len(p) 856 } 857 858 // Less is for the sorting interface. We flip the check 859 // so that the "min" in the min-heap is the element with the 860 // highest priority 861 func (p PendingEvaluations) Less(i, j int) bool { 862 if p[i].JobID != p[j].JobID && p[i].Priority != p[j].Priority { 863 return !(p[i].Priority < p[j].Priority) 864 } 865 return p[i].CreateIndex < p[j].CreateIndex 866 } 867 868 // Swap is for the sorting interface 869 func (p PendingEvaluations) Swap(i, j int) { 870 p[i], p[j] = p[j], p[i] 871 } 872 873 // Push is used to add a new evaluation to the slice 874 func (p *PendingEvaluations) Push(e interface{}) { 875 *p = append(*p, e.(*structs.Evaluation)) 876 } 877 878 // Pop is used to remove an evaluation from the slice 879 func (p *PendingEvaluations) Pop() interface{} { 880 n := len(*p) 881 e := (*p)[n-1] 882 (*p)[n-1] = nil 883 *p = (*p)[:n-1] 884 return e 885 } 886 887 // Peek is used to peek at the next element that would be popped 888 func (p PendingEvaluations) Peek() *structs.Evaluation { 889 n := len(p) 890 if n == 0 { 891 return nil 892 } 893 return p[n-1] 894 }