github.com/quite/nomad@v0.8.6/nomad/deploymentwatcher/deployment_watcher.go (about) 1 package deploymentwatcher 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "sync" 8 "time" 9 10 "golang.org/x/time/rate" 11 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/state" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 const ( 20 // perJobEvalBatchPeriod is the batching length before creating an evaluation to 21 // trigger the scheduler when allocations are marked as healthy. 22 perJobEvalBatchPeriod = 1 * time.Second 23 ) 24 25 var ( 26 // allowRescheduleTransition is the transition that allows failed 27 // allocations part of a deployment to be rescheduled. We create a one off 28 // variable to avoid creating a new object for every request. 29 allowRescheduleTransition = &structs.DesiredTransition{ 30 Reschedule: helper.BoolToPtr(true), 31 } 32 ) 33 34 // deploymentTriggers are the set of functions required to trigger changes on 35 // behalf of a deployment 36 type deploymentTriggers interface { 37 // createUpdate is used to create allocation desired transition updates and 38 // an evaluation. 39 createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error) 40 41 // upsertJob is used to roll back a job when autoreverting for a deployment 42 upsertJob(job *structs.Job) (uint64, error) 43 44 // upsertDeploymentStatusUpdate is used to upsert a deployment status update 45 // and an optional evaluation and job to upsert 46 upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error) 47 48 // upsertDeploymentPromotion is used to promote canaries in a deployment 49 upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error) 50 51 // upsertDeploymentAllocHealth is used to set the health of allocations in a 52 // deployment 53 upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error) 54 } 55 56 // deploymentWatcher is used to watch a single deployment and trigger the 57 // scheduler when allocation health transitions. 58 type deploymentWatcher struct { 59 // queryLimiter is used to limit the rate of blocking queries 60 queryLimiter *rate.Limiter 61 62 // deploymentTriggers holds the methods required to trigger changes on behalf of the 63 // deployment 64 deploymentTriggers 65 66 // state is the state that is watched for state changes. 67 state *state.StateStore 68 69 // deploymentID is the deployment's ID being watched 70 deploymentID string 71 72 // deploymentUpdateCh is triggered when there is an updated deployment 73 deploymentUpdateCh chan struct{} 74 75 // d is the deployment being watched 76 d *structs.Deployment 77 78 // j is the job the deployment is for 79 j *structs.Job 80 81 // outstandingBatch marks whether an outstanding function exists to create 82 // the evaluation. Access should be done through the lock. 83 outstandingBatch bool 84 85 // outstandingAllowReplacements is the map of allocations that will be 86 // marked as allowing a replacement. Access should be done through the lock. 87 outstandingAllowReplacements map[string]*structs.DesiredTransition 88 89 // latestEval is the latest eval for the job. It is updated by the watch 90 // loop and any time an evaluation is created. The field should be accessed 91 // by holding the lock or using the setter and getter methods. 92 latestEval uint64 93 94 logger *log.Logger 95 ctx context.Context 96 exitFn context.CancelFunc 97 l sync.RWMutex 98 } 99 100 // newDeploymentWatcher returns a deployment watcher that is used to watch 101 // deployments and trigger the scheduler as needed. 102 func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter, 103 logger *log.Logger, state *state.StateStore, d *structs.Deployment, 104 j *structs.Job, triggers deploymentTriggers) *deploymentWatcher { 105 106 ctx, exitFn := context.WithCancel(parent) 107 w := &deploymentWatcher{ 108 queryLimiter: queryLimiter, 109 deploymentID: d.ID, 110 deploymentUpdateCh: make(chan struct{}, 1), 111 d: d, 112 j: j, 113 state: state, 114 deploymentTriggers: triggers, 115 logger: logger, 116 ctx: ctx, 117 exitFn: exitFn, 118 } 119 120 // Start the long lived watcher that scans for allocation updates 121 go w.watch() 122 123 return w 124 } 125 126 // updateDeployment is used to update the tracked deployment. 127 func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) { 128 w.l.Lock() 129 defer w.l.Unlock() 130 131 // Update and trigger 132 w.d = d 133 select { 134 case w.deploymentUpdateCh <- struct{}{}: 135 default: 136 } 137 } 138 139 // getDeployment returns the tracked deployment. 140 func (w *deploymentWatcher) getDeployment() *structs.Deployment { 141 w.l.RLock() 142 defer w.l.RUnlock() 143 return w.d 144 } 145 146 func (w *deploymentWatcher) SetAllocHealth( 147 req *structs.DeploymentAllocHealthRequest, 148 resp *structs.DeploymentUpdateResponse) error { 149 150 // If we are failing the deployment, update the status and potentially 151 // rollback 152 var j *structs.Job 153 var u *structs.DeploymentStatusUpdate 154 155 // If there are unhealthy allocations we need to mark the deployment as 156 // failed and check if we should roll back to a stable job. 157 if l := len(req.UnhealthyAllocationIDs); l != 0 { 158 unhealthy := make(map[string]struct{}, l) 159 for _, alloc := range req.UnhealthyAllocationIDs { 160 unhealthy[alloc] = struct{}{} 161 } 162 163 // Get the allocations for the deployment 164 snap, err := w.state.Snapshot() 165 if err != nil { 166 return err 167 } 168 169 allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID) 170 if err != nil { 171 return err 172 } 173 174 // Determine if we should autorevert to an older job 175 desc := structs.DeploymentStatusDescriptionFailedAllocations 176 for _, alloc := range allocs { 177 // Check that the alloc has been marked unhealthy 178 if _, ok := unhealthy[alloc.ID]; !ok { 179 continue 180 } 181 182 // Check if the group has autorevert set 183 group, ok := w.getDeployment().TaskGroups[alloc.TaskGroup] 184 if !ok || !group.AutoRevert { 185 continue 186 } 187 188 var err error 189 j, err = w.latestStableJob() 190 if err != nil { 191 return err 192 } 193 194 if j != nil { 195 j, desc = w.handleRollbackValidity(j, desc) 196 } 197 break 198 } 199 200 u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 201 } 202 203 // Canonicalize the job in case it doesn't have namespace set 204 j.Canonicalize() 205 206 // Create the request 207 areq := &structs.ApplyDeploymentAllocHealthRequest{ 208 DeploymentAllocHealthRequest: *req, 209 Timestamp: time.Now(), 210 Eval: w.getEval(), 211 DeploymentUpdate: u, 212 Job: j, 213 } 214 215 index, err := w.upsertDeploymentAllocHealth(areq) 216 if err != nil { 217 return err 218 } 219 220 // Build the response 221 resp.EvalID = areq.Eval.ID 222 resp.EvalCreateIndex = index 223 resp.DeploymentModifyIndex = index 224 resp.Index = index 225 if j != nil { 226 resp.RevertedJobVersion = helper.Uint64ToPtr(j.Version) 227 } 228 return nil 229 } 230 231 // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job 232 // Returns a modified description and job accordingly. 233 func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) { 234 // Only rollback if job being changed has a different spec. 235 // This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback 236 // If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed. 237 if w.j.SpecChanged(rollbackJob) { 238 desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version) 239 } else { 240 desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version) 241 rollbackJob = nil 242 } 243 return rollbackJob, desc 244 } 245 246 func (w *deploymentWatcher) PromoteDeployment( 247 req *structs.DeploymentPromoteRequest, 248 resp *structs.DeploymentUpdateResponse) error { 249 250 // Create the request 251 areq := &structs.ApplyDeploymentPromoteRequest{ 252 DeploymentPromoteRequest: *req, 253 Eval: w.getEval(), 254 } 255 256 index, err := w.upsertDeploymentPromotion(areq) 257 if err != nil { 258 return err 259 } 260 261 // Build the response 262 resp.EvalID = areq.Eval.ID 263 resp.EvalCreateIndex = index 264 resp.DeploymentModifyIndex = index 265 resp.Index = index 266 return nil 267 } 268 269 func (w *deploymentWatcher) PauseDeployment( 270 req *structs.DeploymentPauseRequest, 271 resp *structs.DeploymentUpdateResponse) error { 272 // Determine the status we should transition to and if we need to create an 273 // evaluation 274 status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused 275 var eval *structs.Evaluation 276 evalID := "" 277 if !req.Pause { 278 status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning 279 eval = w.getEval() 280 evalID = eval.ID 281 } 282 update := w.getDeploymentStatusUpdate(status, desc) 283 284 // Commit the change 285 i, err := w.upsertDeploymentStatusUpdate(update, eval, nil) 286 if err != nil { 287 return err 288 } 289 290 // Build the response 291 if evalID != "" { 292 resp.EvalID = evalID 293 resp.EvalCreateIndex = i 294 } 295 resp.DeploymentModifyIndex = i 296 resp.Index = i 297 return nil 298 } 299 300 func (w *deploymentWatcher) FailDeployment( 301 req *structs.DeploymentFailRequest, 302 resp *structs.DeploymentUpdateResponse) error { 303 304 status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser 305 306 // Determine if we should rollback 307 rollback := false 308 for _, state := range w.getDeployment().TaskGroups { 309 if state.AutoRevert { 310 rollback = true 311 break 312 } 313 } 314 315 var rollbackJob *structs.Job 316 if rollback { 317 var err error 318 rollbackJob, err = w.latestStableJob() 319 if err != nil { 320 return err 321 } 322 323 if rollbackJob != nil { 324 rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc) 325 } else { 326 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 327 } 328 } 329 330 // Commit the change 331 update := w.getDeploymentStatusUpdate(status, desc) 332 eval := w.getEval() 333 i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob) 334 if err != nil { 335 return err 336 } 337 338 // Build the response 339 resp.EvalID = eval.ID 340 resp.EvalCreateIndex = i 341 resp.DeploymentModifyIndex = i 342 resp.Index = i 343 if rollbackJob != nil { 344 resp.RevertedJobVersion = helper.Uint64ToPtr(rollbackJob.Version) 345 } 346 return nil 347 } 348 349 // StopWatch stops watching the deployment. This should be called whenever a 350 // deployment is completed or the watcher is no longer needed. 351 func (w *deploymentWatcher) StopWatch() { 352 w.exitFn() 353 } 354 355 // watch is the long running watcher that watches for both allocation and 356 // deployment changes. Its function is to create evaluations to trigger the 357 // scheduler when more progress can be made, to fail the deployment if it has 358 // failed and potentially rolling back the job. Progress can be made when an 359 // allocation transitions to healthy, so we create an eval. 360 func (w *deploymentWatcher) watch() { 361 // Get the deadline. This is likely a zero time to begin with but we need to 362 // handle the case that the deployment has already progressed and we are now 363 // just starting to watch it. This must likely would occur if there was a 364 // leader transition and we are now starting our watcher. 365 currentDeadline := getDeploymentProgressCutoff(w.getDeployment()) 366 var deadlineTimer *time.Timer 367 if currentDeadline.IsZero() { 368 deadlineTimer = time.NewTimer(0) 369 if !deadlineTimer.Stop() { 370 <-deadlineTimer.C 371 } 372 } else { 373 deadlineTimer = time.NewTimer(currentDeadline.Sub(time.Now())) 374 } 375 376 allocIndex := uint64(1) 377 var updates *allocUpdates 378 379 rollback, deadlineHit := false, false 380 381 FAIL: 382 for { 383 select { 384 case <-w.ctx.Done(): 385 return 386 case <-deadlineTimer.C: 387 // We have hit the progress deadline so fail the deployment. We need 388 // to determine whether we should roll back the job by inspecting 389 // which allocs as part of the deployment are healthy and which 390 // aren't. 391 deadlineHit = true 392 fail, rback, err := w.shouldFail() 393 if err != nil { 394 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to determine whether to rollback job for deployment %q: %v", w.deploymentID, err) 395 } 396 if !fail { 397 w.logger.Printf("[DEBUG] nomad.deployment_watcher: skipping deadline for deployment %q", w.deploymentID) 398 continue 399 } 400 401 w.logger.Printf("[DEBUG] nomad.deployment_watcher: deadline for deployment %q hit and rollback is %v", w.deploymentID, rback) 402 rollback = rback 403 break FAIL 404 case <-w.deploymentUpdateCh: 405 // Get the updated deployment and check if we should change the 406 // deadline timer 407 next := getDeploymentProgressCutoff(w.getDeployment()) 408 if !next.Equal(currentDeadline) { 409 prevDeadlineZero := currentDeadline.IsZero() 410 currentDeadline = next 411 // The most recent deadline can be zero if no allocs were created for this deployment. 412 // The deadline timer would have already been stopped once in that case. To prevent 413 // deadlocking on the already stopped deadline timer, we only drain the channel if 414 // the previous deadline was not zero. 415 if !prevDeadlineZero && !deadlineTimer.Stop() { 416 select { 417 case <-deadlineTimer.C: 418 default: 419 } 420 } 421 deadlineTimer.Reset(next.Sub(time.Now())) 422 } 423 424 case updates = <-w.getAllocsCh(allocIndex): 425 if err := updates.err; err != nil { 426 if err == context.Canceled || w.ctx.Err() == context.Canceled { 427 return 428 } 429 430 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to retrieve allocations for deployment %q: %v", w.deploymentID, err) 431 return 432 } 433 allocIndex = updates.index 434 435 // We have allocation changes for this deployment so determine the 436 // steps to take. 437 res, err := w.handleAllocUpdate(updates.allocs) 438 if err != nil { 439 if err == context.Canceled || w.ctx.Err() == context.Canceled { 440 return 441 } 442 443 w.logger.Printf("[ERR] nomad.deployment_watcher: failed handling allocation updates: %v", err) 444 return 445 } 446 447 // The deployment has failed, so break out of the watch loop and 448 // handle the failure 449 if res.failDeployment { 450 rollback = res.rollback 451 break FAIL 452 } 453 454 // Create an eval to push the deployment along 455 if res.createEval || len(res.allowReplacements) != 0 { 456 w.createBatchedUpdate(res.allowReplacements, allocIndex) 457 } 458 } 459 } 460 461 // Change the deployments status to failed 462 desc := structs.DeploymentStatusDescriptionFailedAllocations 463 if deadlineHit { 464 desc = structs.DeploymentStatusDescriptionProgressDeadline 465 } 466 467 // Rollback to the old job if necessary 468 var j *structs.Job 469 if rollback { 470 var err error 471 j, err = w.latestStableJob() 472 if err != nil { 473 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to lookup latest stable job for %q: %v", w.j.ID, err) 474 } 475 476 // Description should include that the job is being rolled back to 477 // version N 478 if j != nil { 479 j, desc = w.handleRollbackValidity(j, desc) 480 } else { 481 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 482 } 483 } 484 485 // Update the status of the deployment to failed and create an evaluation. 486 e := w.getEval() 487 u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 488 if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil { 489 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to update deployment %q status: %v", w.deploymentID, err) 490 } 491 } 492 493 // allocUpdateResult is used to return the desired actions given the newest set 494 // of allocations for the deployment. 495 type allocUpdateResult struct { 496 createEval bool 497 failDeployment bool 498 rollback bool 499 allowReplacements []string 500 } 501 502 // handleAllocUpdate is used to compute the set of actions to take based on the 503 // updated allocations for the deployment. 504 func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) { 505 var res allocUpdateResult 506 507 // Get the latest evaluation index 508 latestEval, blocked, err := w.jobEvalStatus() 509 if err != nil { 510 if err == context.Canceled || w.ctx.Err() == context.Canceled { 511 return res, err 512 } 513 514 return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err) 515 } 516 517 deployment := w.getDeployment() 518 for _, alloc := range allocs { 519 dstate, ok := deployment.TaskGroups[alloc.TaskGroup] 520 if !ok { 521 continue 522 } 523 524 // Determine if the update stanza for this group is progress based 525 progressBased := dstate.ProgressDeadline != 0 526 527 // Check if the allocation has failed and we need to mark it for allow 528 // replacements 529 if progressBased && alloc.DeploymentStatus.IsUnhealthy() && 530 deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() { 531 res.allowReplacements = append(res.allowReplacements, alloc.ID) 532 continue 533 } 534 535 // We need to create an eval so the job can progress. 536 if !blocked && alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval { 537 res.createEval = true 538 } 539 540 // If the group is using a progress deadline, we don't have to do anything. 541 if progressBased { 542 continue 543 } 544 545 // Fail on the first bad allocation 546 if alloc.DeploymentStatus.IsUnhealthy() { 547 // Check if the group has autorevert set 548 if dstate.AutoRevert { 549 res.rollback = true 550 } 551 552 // Since we have an unhealthy allocation, fail the deployment 553 res.failDeployment = true 554 } 555 556 // All conditions have been hit so we can break 557 if res.createEval && res.failDeployment && res.rollback { 558 break 559 } 560 } 561 562 return res, nil 563 } 564 565 // shouldFail returns whether the job should be failed and whether it should 566 // rolled back to an earlier stable version by examining the allocations in the 567 // deployment. 568 func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) { 569 snap, err := w.state.Snapshot() 570 if err != nil { 571 return false, false, err 572 } 573 574 d, err := snap.DeploymentByID(nil, w.deploymentID) 575 if err != nil { 576 return false, false, err 577 } 578 if d == nil { 579 // The deployment wasn't in the state store, possibly due to a system gc 580 return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID) 581 } 582 583 fail = false 584 for tg, state := range d.TaskGroups { 585 // If we are in a canary state we fail if there aren't enough healthy 586 // allocs to satisfy DesiredCanaries 587 if state.DesiredCanaries > 0 && !state.Promoted { 588 if state.HealthyAllocs >= state.DesiredCanaries { 589 continue 590 } 591 } else if state.HealthyAllocs >= state.DesiredTotal { 592 continue 593 } 594 595 // We have failed this TG 596 fail = true 597 598 // We don't need to autorevert this group 599 upd := w.j.LookupTaskGroup(tg).Update 600 if upd == nil || !upd.AutoRevert { 601 continue 602 } 603 604 // Unhealthy allocs and we need to autorevert 605 return true, true, nil 606 } 607 608 return fail, false, nil 609 } 610 611 // getDeploymentProgressCutoff returns the progress cutoff for the given 612 // deployment 613 func getDeploymentProgressCutoff(d *structs.Deployment) time.Time { 614 var next time.Time 615 for _, state := range d.TaskGroups { 616 if next.IsZero() || state.RequireProgressBy.Before(next) { 617 next = state.RequireProgressBy 618 } 619 } 620 return next 621 } 622 623 // latestStableJob returns the latest stable job. It may be nil if none exist 624 func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) { 625 snap, err := w.state.Snapshot() 626 if err != nil { 627 return nil, err 628 } 629 630 versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID) 631 if err != nil { 632 return nil, err 633 } 634 635 var stable *structs.Job 636 for _, job := range versions { 637 if job.Stable { 638 stable = job 639 break 640 } 641 } 642 643 return stable, nil 644 } 645 646 // createBatchedUpdate creates an eval for the given index as well as updating 647 // the given allocations to allow them to reschedule. 648 func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) { 649 w.l.Lock() 650 defer w.l.Unlock() 651 652 // Store the allocations that can be replaced 653 for _, allocID := range allowReplacements { 654 if w.outstandingAllowReplacements == nil { 655 w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements)) 656 } 657 w.outstandingAllowReplacements[allocID] = allowRescheduleTransition 658 } 659 660 if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) { 661 return 662 } 663 664 w.outstandingBatch = true 665 666 time.AfterFunc(perJobEvalBatchPeriod, func() { 667 // If the timer has been created and then we shutdown, we need to no-op 668 // the evaluation creation. 669 select { 670 case <-w.ctx.Done(): 671 return 672 default: 673 } 674 675 w.l.Lock() 676 replacements := w.outstandingAllowReplacements 677 w.outstandingAllowReplacements = nil 678 w.outstandingBatch = false 679 w.l.Unlock() 680 681 // Create the eval 682 if _, err := w.createUpdate(replacements, w.getEval()); err != nil { 683 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to create evaluation for deployment %q: %v", w.deploymentID, err) 684 } 685 }) 686 } 687 688 // getEval returns an evaluation suitable for the deployment 689 func (w *deploymentWatcher) getEval() *structs.Evaluation { 690 return &structs.Evaluation{ 691 ID: uuid.Generate(), 692 Namespace: w.j.Namespace, 693 Priority: w.j.Priority, 694 Type: w.j.Type, 695 TriggeredBy: structs.EvalTriggerDeploymentWatcher, 696 JobID: w.j.ID, 697 DeploymentID: w.deploymentID, 698 Status: structs.EvalStatusPending, 699 } 700 } 701 702 // getDeploymentStatusUpdate returns a deployment status update 703 func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate { 704 return &structs.DeploymentStatusUpdate{ 705 DeploymentID: w.deploymentID, 706 Status: status, 707 StatusDescription: desc, 708 } 709 } 710 711 type allocUpdates struct { 712 allocs []*structs.AllocListStub 713 index uint64 714 err error 715 } 716 717 // getAllocsCh retrieves the allocations that are part of the deployment blocking 718 // at the given index. 719 func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates { 720 out := make(chan *allocUpdates, 1) 721 go func() { 722 allocs, index, err := w.getAllocs(index) 723 out <- &allocUpdates{ 724 allocs: allocs, 725 index: index, 726 err: err, 727 } 728 }() 729 730 return out 731 } 732 733 // getAllocs retrieves the allocations that are part of the deployment blocking 734 // at the given index. 735 func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) { 736 resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx) 737 if err != nil { 738 return nil, 0, err 739 } 740 if err := w.ctx.Err(); err != nil { 741 return nil, 0, err 742 } 743 744 return resp.([]*structs.AllocListStub), index, nil 745 } 746 747 // getDeploysImpl retrieves all deployments from the passed state store. 748 func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { 749 if err := w.queryLimiter.Wait(w.ctx); err != nil { 750 return nil, 0, err 751 } 752 753 // Capture all the allocations 754 allocs, err := state.AllocsByDeployment(ws, w.deploymentID) 755 if err != nil { 756 return nil, 0, err 757 } 758 759 maxIndex := uint64(0) 760 stubs := make([]*structs.AllocListStub, 0, len(allocs)) 761 for _, alloc := range allocs { 762 stubs = append(stubs, alloc.Stub()) 763 764 if maxIndex < alloc.ModifyIndex { 765 maxIndex = alloc.ModifyIndex 766 } 767 } 768 769 // Use the last index that affected the allocs table 770 if len(stubs) == 0 { 771 index, err := state.Index("allocs") 772 if err != nil { 773 return nil, index, err 774 } 775 maxIndex = index 776 } 777 778 return stubs, maxIndex, nil 779 } 780 781 // jobEvalStatus returns the eval status for a job. It returns the index of the 782 // last evaluation created for the job, as well as whether there exists a 783 // blocked evaluation for the job. The index is used to determine if an 784 // allocation update requires an evaluation to be triggered. If there already is 785 // a blocked evaluations, no eval should be created. 786 func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, blocked bool, err error) { 787 if err := w.queryLimiter.Wait(w.ctx); err != nil { 788 return 0, false, err 789 } 790 791 snap, err := w.state.Snapshot() 792 if err != nil { 793 return 0, false, err 794 } 795 796 evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID) 797 if err != nil { 798 return 0, false, err 799 } 800 801 if len(evals) == 0 { 802 index, err := snap.Index("evals") 803 return index, false, err 804 } 805 806 var max uint64 807 for _, eval := range evals { 808 // If we have a blocked eval, then we do not care what the index is 809 // since we will not need to make a new eval. 810 if eval.ShouldBlock() { 811 return 0, true, nil 812 } 813 814 // Prefer using the snapshot index. Otherwise use the create index 815 if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex { 816 max = eval.SnapshotIndex 817 } else if max < eval.CreateIndex { 818 max = eval.CreateIndex 819 } 820 } 821 822 return max, false, nil 823 }