github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/deploymentwatcher/deployment_watcher.go (about) 1 package deploymentwatcher 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "sync" 8 "time" 9 10 "golang.org/x/time/rate" 11 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/state" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 const ( 20 // perJobEvalBatchPeriod is the batching length before creating an evaluation to 21 // trigger the scheduler when allocations are marked as healthy. 22 perJobEvalBatchPeriod = 1 * time.Second 23 ) 24 25 var ( 26 // allowRescheduleTransition is the transition that allows failed 27 // allocations part of a deployment to be rescheduled. We create a one off 28 // variable to avoid creating a new object for every request. 29 allowRescheduleTransition = &structs.DesiredTransition{ 30 Reschedule: helper.BoolToPtr(true), 31 } 32 ) 33 34 // deploymentTriggers are the set of functions required to trigger changes on 35 // behalf of a deployment 36 type deploymentTriggers interface { 37 // createUpdate is used to create allocation desired transition updates and 38 // an evaluation. 39 createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error) 40 41 // upsertJob is used to roll back a job when autoreverting for a deployment 42 upsertJob(job *structs.Job) (uint64, error) 43 44 // upsertDeploymentStatusUpdate is used to upsert a deployment status update 45 // and an optional evaluation and job to upsert 46 upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error) 47 48 // upsertDeploymentPromotion is used to promote canaries in a deployment 49 upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error) 50 51 // upsertDeploymentAllocHealth is used to set the health of allocations in a 52 // deployment 53 upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error) 54 } 55 56 // deploymentWatcher is used to watch a single deployment and trigger the 57 // scheduler when allocation health transitions. 58 type deploymentWatcher struct { 59 // queryLimiter is used to limit the rate of blocking queries 60 queryLimiter *rate.Limiter 61 62 // deploymentTriggers holds the methods required to trigger changes on behalf of the 63 // deployment 64 deploymentTriggers 65 66 // state is the state that is watched for state changes. 67 state *state.StateStore 68 69 // deploymentID is the deployment's ID being watched 70 deploymentID string 71 72 // deploymentUpdateCh is triggered when there is an updated deployment 73 deploymentUpdateCh chan struct{} 74 75 // d is the deployment being watched 76 d *structs.Deployment 77 78 // j is the job the deployment is for 79 j *structs.Job 80 81 // outstandingBatch marks whether an outstanding function exists to create 82 // the evaluation. Access should be done through the lock. 83 outstandingBatch bool 84 85 // outstandingAllowReplacements is the map of allocations that will be 86 // marked as allowing a replacement. Access should be done through the lock. 87 outstandingAllowReplacements map[string]*structs.DesiredTransition 88 89 // latestEval is the latest eval for the job. It is updated by the watch 90 // loop and any time an evaluation is created. The field should be accessed 91 // by holding the lock or using the setter and getter methods. 92 latestEval uint64 93 94 logger *log.Logger 95 ctx context.Context 96 exitFn context.CancelFunc 97 l sync.RWMutex 98 } 99 100 // newDeploymentWatcher returns a deployment watcher that is used to watch 101 // deployments and trigger the scheduler as needed. 102 func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter, 103 logger *log.Logger, state *state.StateStore, d *structs.Deployment, 104 j *structs.Job, triggers deploymentTriggers) *deploymentWatcher { 105 106 ctx, exitFn := context.WithCancel(parent) 107 w := &deploymentWatcher{ 108 queryLimiter: queryLimiter, 109 deploymentID: d.ID, 110 deploymentUpdateCh: make(chan struct{}, 1), 111 d: d, 112 j: j, 113 state: state, 114 deploymentTriggers: triggers, 115 logger: logger, 116 ctx: ctx, 117 exitFn: exitFn, 118 } 119 120 // Start the long lived watcher that scans for allocation updates 121 go w.watch() 122 123 return w 124 } 125 126 // updateDeployment is used to update the tracked deployment. 127 func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) { 128 w.l.Lock() 129 defer w.l.Unlock() 130 131 // Update and trigger 132 w.d = d 133 select { 134 case w.deploymentUpdateCh <- struct{}{}: 135 default: 136 } 137 } 138 139 // getDeployment returns the tracked deployment. 140 func (w *deploymentWatcher) getDeployment() *structs.Deployment { 141 w.l.RLock() 142 defer w.l.RUnlock() 143 return w.d 144 } 145 146 func (w *deploymentWatcher) SetAllocHealth( 147 req *structs.DeploymentAllocHealthRequest, 148 resp *structs.DeploymentUpdateResponse) error { 149 150 // If we are failing the deployment, update the status and potentially 151 // rollback 152 var j *structs.Job 153 var u *structs.DeploymentStatusUpdate 154 155 // If there are unhealthy allocations we need to mark the deployment as 156 // failed and check if we should roll back to a stable job. 157 if l := len(req.UnhealthyAllocationIDs); l != 0 { 158 unhealthy := make(map[string]struct{}, l) 159 for _, alloc := range req.UnhealthyAllocationIDs { 160 unhealthy[alloc] = struct{}{} 161 } 162 163 // Get the allocations for the deployment 164 snap, err := w.state.Snapshot() 165 if err != nil { 166 return err 167 } 168 169 allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID) 170 if err != nil { 171 return err 172 } 173 174 // Determine if we should autorevert to an older job 175 desc := structs.DeploymentStatusDescriptionFailedAllocations 176 for _, alloc := range allocs { 177 // Check that the alloc has been marked unhealthy 178 if _, ok := unhealthy[alloc.ID]; !ok { 179 continue 180 } 181 182 // Check if the group has autorevert set 183 group, ok := w.getDeployment().TaskGroups[alloc.TaskGroup] 184 if !ok || !group.AutoRevert { 185 continue 186 } 187 188 var err error 189 j, err = w.latestStableJob() 190 if err != nil { 191 return err 192 } 193 194 if j != nil { 195 j, desc = w.handleRollbackValidity(j, desc) 196 } 197 break 198 } 199 200 u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 201 } 202 203 // Canonicalize the job in case it doesn't have namespace set 204 j.Canonicalize() 205 206 // Create the request 207 areq := &structs.ApplyDeploymentAllocHealthRequest{ 208 DeploymentAllocHealthRequest: *req, 209 Timestamp: time.Now(), 210 Eval: w.getEval(), 211 DeploymentUpdate: u, 212 Job: j, 213 } 214 215 index, err := w.upsertDeploymentAllocHealth(areq) 216 if err != nil { 217 return err 218 } 219 220 // Build the response 221 resp.EvalID = areq.Eval.ID 222 resp.EvalCreateIndex = index 223 resp.DeploymentModifyIndex = index 224 resp.Index = index 225 if j != nil { 226 resp.RevertedJobVersion = helper.Uint64ToPtr(j.Version) 227 } 228 w.setLatestEval(index) 229 return nil 230 } 231 232 // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job 233 // Returns a modified description and job accordingly. 234 func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) { 235 // Only rollback if job being changed has a different spec. 236 // This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback 237 // If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed. 238 if w.j.SpecChanged(rollbackJob) { 239 desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version) 240 } else { 241 desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version) 242 rollbackJob = nil 243 } 244 return rollbackJob, desc 245 } 246 247 func (w *deploymentWatcher) PromoteDeployment( 248 req *structs.DeploymentPromoteRequest, 249 resp *structs.DeploymentUpdateResponse) error { 250 251 // Create the request 252 areq := &structs.ApplyDeploymentPromoteRequest{ 253 DeploymentPromoteRequest: *req, 254 Eval: w.getEval(), 255 } 256 257 index, err := w.upsertDeploymentPromotion(areq) 258 if err != nil { 259 return err 260 } 261 262 // Build the response 263 resp.EvalID = areq.Eval.ID 264 resp.EvalCreateIndex = index 265 resp.DeploymentModifyIndex = index 266 resp.Index = index 267 w.setLatestEval(index) 268 return nil 269 } 270 271 func (w *deploymentWatcher) PauseDeployment( 272 req *structs.DeploymentPauseRequest, 273 resp *structs.DeploymentUpdateResponse) error { 274 // Determine the status we should transition to and if we need to create an 275 // evaluation 276 status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused 277 var eval *structs.Evaluation 278 evalID := "" 279 if !req.Pause { 280 status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning 281 eval = w.getEval() 282 evalID = eval.ID 283 } 284 update := w.getDeploymentStatusUpdate(status, desc) 285 286 // Commit the change 287 i, err := w.upsertDeploymentStatusUpdate(update, eval, nil) 288 if err != nil { 289 return err 290 } 291 292 // Build the response 293 if evalID != "" { 294 resp.EvalID = evalID 295 resp.EvalCreateIndex = i 296 } 297 resp.DeploymentModifyIndex = i 298 resp.Index = i 299 w.setLatestEval(i) 300 return nil 301 } 302 303 func (w *deploymentWatcher) FailDeployment( 304 req *structs.DeploymentFailRequest, 305 resp *structs.DeploymentUpdateResponse) error { 306 307 status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser 308 309 // Determine if we should rollback 310 rollback := false 311 for _, state := range w.getDeployment().TaskGroups { 312 if state.AutoRevert { 313 rollback = true 314 break 315 } 316 } 317 318 var rollbackJob *structs.Job 319 if rollback { 320 var err error 321 rollbackJob, err = w.latestStableJob() 322 if err != nil { 323 return err 324 } 325 326 if rollbackJob != nil { 327 rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc) 328 } else { 329 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 330 } 331 } 332 333 // Commit the change 334 update := w.getDeploymentStatusUpdate(status, desc) 335 eval := w.getEval() 336 i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob) 337 if err != nil { 338 return err 339 } 340 341 // Build the response 342 resp.EvalID = eval.ID 343 resp.EvalCreateIndex = i 344 resp.DeploymentModifyIndex = i 345 resp.Index = i 346 if rollbackJob != nil { 347 resp.RevertedJobVersion = helper.Uint64ToPtr(rollbackJob.Version) 348 } 349 w.setLatestEval(i) 350 return nil 351 } 352 353 // StopWatch stops watching the deployment. This should be called whenever a 354 // deployment is completed or the watcher is no longer needed. 355 func (w *deploymentWatcher) StopWatch() { 356 w.exitFn() 357 } 358 359 // watch is the long running watcher that watches for both allocation and 360 // deployment changes. Its function is to create evaluations to trigger the 361 // scheduler when more progress can be made, to fail the deployment if it has 362 // failed and potentially rolling back the job. Progress can be made when an 363 // allocation transitions to healthy, so we create an eval. 364 func (w *deploymentWatcher) watch() { 365 // Get the deadline. This is likely a zero time to begin with but we need to 366 // handle the case that the deployment has already progressed and we are now 367 // just starting to watch it. This must likely would occur if there was a 368 // leader transition and we are now starting our watcher. 369 currentDeadline := getDeploymentProgressCutoff(w.getDeployment()) 370 var deadlineTimer *time.Timer 371 if currentDeadline.IsZero() { 372 deadlineTimer = time.NewTimer(0) 373 if !deadlineTimer.Stop() { 374 <-deadlineTimer.C 375 } 376 } else { 377 deadlineTimer = time.NewTimer(currentDeadline.Sub(time.Now())) 378 } 379 380 allocIndex := uint64(1) 381 var updates *allocUpdates 382 383 rollback, deadlineHit := false, false 384 385 FAIL: 386 for { 387 select { 388 case <-w.ctx.Done(): 389 return 390 case <-deadlineTimer.C: 391 // We have hit the progress deadline so fail the deployment. We need 392 // to determine whether we should roll back the job by inspecting 393 // which allocs as part of the deployment are healthy and which 394 // aren't. 395 deadlineHit = true 396 fail, rback, err := w.shouldFail() 397 if err != nil { 398 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to determine whether to rollback job for deployment %q: %v", w.deploymentID, err) 399 } 400 if !fail { 401 w.logger.Printf("[DEBUG] nomad.deployment_watcher: skipping deadline for deployment %q", w.deploymentID) 402 continue 403 } 404 405 w.logger.Printf("[DEBUG] nomad.deployment_watcher: deadline for deployment %q hit and rollback is %v", w.deploymentID, rback) 406 rollback = rback 407 break FAIL 408 case <-w.deploymentUpdateCh: 409 // Get the updated deployment and check if we should change the 410 // deadline timer 411 next := getDeploymentProgressCutoff(w.getDeployment()) 412 if !next.Equal(currentDeadline) { 413 prevDeadlineZero := currentDeadline.IsZero() 414 currentDeadline = next 415 // The most recent deadline can be zero if no allocs were created for this deployment. 416 // The deadline timer would have already been stopped once in that case. To prevent 417 // deadlocking on the already stopped deadline timer, we only drain the channel if 418 // the previous deadline was not zero. 419 if !prevDeadlineZero && !deadlineTimer.Stop() { 420 select { 421 case <-deadlineTimer.C: 422 default: 423 } 424 } 425 deadlineTimer.Reset(next.Sub(time.Now())) 426 } 427 428 case updates = <-w.getAllocsCh(allocIndex): 429 if err := updates.err; err != nil { 430 if err == context.Canceled || w.ctx.Err() == context.Canceled { 431 return 432 } 433 434 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to retrieve allocations for deployment %q: %v", w.deploymentID, err) 435 return 436 } 437 allocIndex = updates.index 438 439 // We have allocation changes for this deployment so determine the 440 // steps to take. 441 res, err := w.handleAllocUpdate(updates.allocs) 442 if err != nil { 443 if err == context.Canceled || w.ctx.Err() == context.Canceled { 444 return 445 } 446 447 w.logger.Printf("[ERR] nomad.deployment_watcher: failed handling allocation updates: %v", err) 448 return 449 } 450 451 // The deployment has failed, so break out of the watch loop and 452 // handle the failure 453 if res.failDeployment { 454 rollback = res.rollback 455 break FAIL 456 } 457 458 // Create an eval to push the deployment along 459 if res.createEval || len(res.allowReplacements) != 0 { 460 w.createBatchedUpdate(res.allowReplacements, allocIndex) 461 } 462 } 463 } 464 465 // Change the deployments status to failed 466 desc := structs.DeploymentStatusDescriptionFailedAllocations 467 if deadlineHit { 468 desc = structs.DeploymentStatusDescriptionProgressDeadline 469 } 470 471 // Rollback to the old job if necessary 472 var j *structs.Job 473 if rollback { 474 var err error 475 j, err = w.latestStableJob() 476 if err != nil { 477 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to lookup latest stable job for %q: %v", w.j.ID, err) 478 } 479 480 // Description should include that the job is being rolled back to 481 // version N 482 if j != nil { 483 j, desc = w.handleRollbackValidity(j, desc) 484 } else { 485 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 486 } 487 } 488 489 // Update the status of the deployment to failed and create an evaluation. 490 e := w.getEval() 491 u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 492 if index, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil { 493 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to update deployment %q status: %v", w.deploymentID, err) 494 } else { 495 w.setLatestEval(index) 496 } 497 } 498 499 // allocUpdateResult is used to return the desired actions given the newest set 500 // of allocations for the deployment. 501 type allocUpdateResult struct { 502 createEval bool 503 failDeployment bool 504 rollback bool 505 allowReplacements []string 506 } 507 508 // handleAllocUpdate is used to compute the set of actions to take based on the 509 // updated allocations for the deployment. 510 func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) { 511 var res allocUpdateResult 512 513 // Get the latest evaluation index 514 latestEval, err := w.latestEvalIndex() 515 if err != nil { 516 if err == context.Canceled || w.ctx.Err() == context.Canceled { 517 return res, err 518 } 519 520 return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err) 521 } 522 523 deployment := w.getDeployment() 524 for _, alloc := range allocs { 525 dstate, ok := deployment.TaskGroups[alloc.TaskGroup] 526 if !ok { 527 continue 528 } 529 530 // Nothing to do for this allocation 531 if alloc.DeploymentStatus == nil || alloc.DeploymentStatus.ModifyIndex <= latestEval { 532 continue 533 } 534 535 // Determine if the update stanza for this group is progress based 536 progressBased := dstate.ProgressDeadline != 0 537 538 // We need to create an eval so the job can progress. 539 if alloc.DeploymentStatus.IsHealthy() { 540 res.createEval = true 541 } else if progressBased && alloc.DeploymentStatus.IsUnhealthy() && deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() { 542 res.allowReplacements = append(res.allowReplacements, alloc.ID) 543 } 544 545 // If the group is using a progress deadline, we don't have to do anything. 546 if progressBased { 547 continue 548 } 549 550 // Fail on the first bad allocation 551 if alloc.DeploymentStatus.IsUnhealthy() { 552 // Check if the group has autorevert set 553 if dstate.AutoRevert { 554 res.rollback = true 555 } 556 557 // Since we have an unhealthy allocation, fail the deployment 558 res.failDeployment = true 559 } 560 561 // All conditions have been hit so we can break 562 if res.createEval && res.failDeployment && res.rollback { 563 break 564 } 565 } 566 567 return res, nil 568 } 569 570 // shouldFail returns whether the job should be failed and whether it should 571 // rolled back to an earlier stable version by examining the allocations in the 572 // deployment. 573 func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) { 574 snap, err := w.state.Snapshot() 575 if err != nil { 576 return false, false, err 577 } 578 579 d, err := snap.DeploymentByID(nil, w.deploymentID) 580 if err != nil { 581 return false, false, err 582 } 583 if d == nil { 584 // The deployment wasn't in the state store, possibly due to a system gc 585 return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID) 586 } 587 588 fail = false 589 for tg, state := range d.TaskGroups { 590 // If we are in a canary state we fail if there aren't enough healthy 591 // allocs to satisfy DesiredCanaries 592 if state.DesiredCanaries > 0 && !state.Promoted { 593 if state.HealthyAllocs >= state.DesiredCanaries { 594 continue 595 } 596 } else if state.HealthyAllocs >= state.DesiredTotal { 597 continue 598 } 599 600 // We have failed this TG 601 fail = true 602 603 // We don't need to autorevert this group 604 upd := w.j.LookupTaskGroup(tg).Update 605 if upd == nil || !upd.AutoRevert { 606 continue 607 } 608 609 // Unhealthy allocs and we need to autorevert 610 return true, true, nil 611 } 612 613 return fail, false, nil 614 } 615 616 // getDeploymentProgressCutoff returns the progress cutoff for the given 617 // deployment 618 func getDeploymentProgressCutoff(d *structs.Deployment) time.Time { 619 var next time.Time 620 for _, state := range d.TaskGroups { 621 if next.IsZero() || state.RequireProgressBy.Before(next) { 622 next = state.RequireProgressBy 623 } 624 } 625 return next 626 } 627 628 // latestStableJob returns the latest stable job. It may be nil if none exist 629 func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) { 630 snap, err := w.state.Snapshot() 631 if err != nil { 632 return nil, err 633 } 634 635 versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID) 636 if err != nil { 637 return nil, err 638 } 639 640 var stable *structs.Job 641 for _, job := range versions { 642 if job.Stable { 643 stable = job 644 break 645 } 646 } 647 648 return stable, nil 649 } 650 651 // createBatchedUpdate creates an eval for the given index as well as updating 652 // the given allocations to allow them to reschedule. 653 func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) { 654 w.l.Lock() 655 defer w.l.Unlock() 656 657 // Store the allocations that can be replaced 658 for _, allocID := range allowReplacements { 659 if w.outstandingAllowReplacements == nil { 660 w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements)) 661 } 662 w.outstandingAllowReplacements[allocID] = allowRescheduleTransition 663 } 664 665 if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) { 666 return 667 } 668 669 w.outstandingBatch = true 670 671 time.AfterFunc(perJobEvalBatchPeriod, func() { 672 // If the timer has been created and then we shutdown, we need to no-op 673 // the evaluation creation. 674 select { 675 case <-w.ctx.Done(): 676 return 677 default: 678 } 679 680 w.l.Lock() 681 replacements := w.outstandingAllowReplacements 682 w.outstandingAllowReplacements = nil 683 w.outstandingBatch = false 684 w.l.Unlock() 685 686 // Create the eval 687 if index, err := w.createUpdate(replacements, w.getEval()); err != nil { 688 w.logger.Printf("[ERR] nomad.deployment_watcher: failed to create evaluation for deployment %q: %v", w.deploymentID, err) 689 } else { 690 w.setLatestEval(index) 691 } 692 }) 693 } 694 695 // getEval returns an evaluation suitable for the deployment 696 func (w *deploymentWatcher) getEval() *structs.Evaluation { 697 return &structs.Evaluation{ 698 ID: uuid.Generate(), 699 Namespace: w.j.Namespace, 700 Priority: w.j.Priority, 701 Type: w.j.Type, 702 TriggeredBy: structs.EvalTriggerDeploymentWatcher, 703 JobID: w.j.ID, 704 DeploymentID: w.deploymentID, 705 Status: structs.EvalStatusPending, 706 } 707 } 708 709 // getDeploymentStatusUpdate returns a deployment status update 710 func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate { 711 return &structs.DeploymentStatusUpdate{ 712 DeploymentID: w.deploymentID, 713 Status: status, 714 StatusDescription: desc, 715 } 716 } 717 718 type allocUpdates struct { 719 allocs []*structs.AllocListStub 720 index uint64 721 err error 722 } 723 724 // getAllocsCh retrieves the allocations that are part of the deployment blocking 725 // at the given index. 726 func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates { 727 out := make(chan *allocUpdates, 1) 728 go func() { 729 allocs, index, err := w.getAllocs(index) 730 out <- &allocUpdates{ 731 allocs: allocs, 732 index: index, 733 err: err, 734 } 735 }() 736 737 return out 738 } 739 740 // getAllocs retrieves the allocations that are part of the deployment blocking 741 // at the given index. 742 func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) { 743 resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx) 744 if err != nil { 745 return nil, 0, err 746 } 747 if err := w.ctx.Err(); err != nil { 748 return nil, 0, err 749 } 750 751 return resp.([]*structs.AllocListStub), index, nil 752 } 753 754 // getDeploysImpl retrieves all deployments from the passed state store. 755 func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { 756 if err := w.queryLimiter.Wait(w.ctx); err != nil { 757 return nil, 0, err 758 } 759 760 // Capture all the allocations 761 allocs, err := state.AllocsByDeployment(ws, w.deploymentID) 762 if err != nil { 763 return nil, 0, err 764 } 765 766 stubs := make([]*structs.AllocListStub, 0, len(allocs)) 767 for _, alloc := range allocs { 768 stubs = append(stubs, alloc.Stub()) 769 } 770 771 // Use the last index that affected the jobs table 772 index, err := state.Index("allocs") 773 if err != nil { 774 return nil, index, err 775 } 776 777 return stubs, index, nil 778 } 779 780 // latestEvalIndex returns the index of the last evaluation created for 781 // the job. The index is used to determine if an allocation update requires an 782 // evaluation to be triggered. 783 func (w *deploymentWatcher) latestEvalIndex() (uint64, error) { 784 if err := w.queryLimiter.Wait(w.ctx); err != nil { 785 return 0, err 786 } 787 788 snap, err := w.state.Snapshot() 789 if err != nil { 790 return 0, err 791 } 792 793 evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID) 794 if err != nil { 795 return 0, err 796 } 797 798 if len(evals) == 0 { 799 idx, err := snap.Index("evals") 800 if err != nil { 801 w.setLatestEval(idx) 802 } 803 804 return idx, err 805 } 806 807 // Prefer using the snapshot index. Otherwise use the create index 808 e := evals[0] 809 if e.SnapshotIndex != 0 { 810 w.setLatestEval(e.SnapshotIndex) 811 return e.SnapshotIndex, nil 812 } 813 814 w.setLatestEval(e.CreateIndex) 815 return e.CreateIndex, nil 816 } 817 818 // setLatestEval sets the given index as the latest eval unless the currently 819 // stored index is higher. 820 func (w *deploymentWatcher) setLatestEval(index uint64) { 821 w.l.Lock() 822 defer w.l.Unlock() 823 if index > w.latestEval { 824 w.latestEval = index 825 } 826 } 827 828 // getLatestEval returns the latest eval index. 829 func (w *deploymentWatcher) getLatestEval() uint64 { 830 w.l.Lock() 831 defer w.l.Unlock() 832 return w.latestEval 833 }