github.com/hernad/nomad@v1.6.112/nomad/deploymentwatcher/deployment_watcher.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package deploymentwatcher 5 6 import ( 7 "context" 8 "fmt" 9 "sync" 10 "time" 11 12 log "github.com/hashicorp/go-hclog" 13 memdb "github.com/hashicorp/go-memdb" 14 "github.com/hernad/nomad/helper/pointer" 15 "github.com/hernad/nomad/helper/uuid" 16 "github.com/hernad/nomad/nomad/state" 17 "github.com/hernad/nomad/nomad/structs" 18 "golang.org/x/time/rate" 19 ) 20 21 const ( 22 // perJobEvalBatchPeriod is the batching length before creating an evaluation to 23 // trigger the scheduler when allocations are marked as healthy. 24 perJobEvalBatchPeriod = 1 * time.Second 25 ) 26 27 var ( 28 // allowRescheduleTransition is the transition that allows failed 29 // allocations part of a deployment to be rescheduled. We create a one off 30 // variable to avoid creating a new object for every request. 31 allowRescheduleTransition = &structs.DesiredTransition{ 32 Reschedule: pointer.Of(true), 33 } 34 ) 35 36 // deploymentTriggers are the set of functions required to trigger changes on 37 // behalf of a deployment 38 type deploymentTriggers interface { 39 // createUpdate is used to create allocation desired transition updates and 40 // an evaluation. 41 createUpdate(allocs map[string]*structs.DesiredTransition, eval *structs.Evaluation) (uint64, error) 42 43 // upsertJob is used to roll back a job when autoreverting for a deployment 44 upsertJob(job *structs.Job) (uint64, error) 45 46 // upsertDeploymentStatusUpdate is used to upsert a deployment status update 47 // and an optional evaluation and job to upsert 48 upsertDeploymentStatusUpdate(u *structs.DeploymentStatusUpdate, eval *structs.Evaluation, job *structs.Job) (uint64, error) 49 50 // upsertDeploymentPromotion is used to promote canaries in a deployment 51 upsertDeploymentPromotion(req *structs.ApplyDeploymentPromoteRequest) (uint64, error) 52 53 // upsertDeploymentAllocHealth is used to set the health of allocations in a 54 // deployment 55 upsertDeploymentAllocHealth(req *structs.ApplyDeploymentAllocHealthRequest) (uint64, error) 56 } 57 58 // deploymentWatcher is used to watch a single deployment and trigger the 59 // scheduler when allocation health transitions. 60 type deploymentWatcher struct { 61 // queryLimiter is used to limit the rate of blocking queries 62 queryLimiter *rate.Limiter 63 64 // deploymentTriggers holds the methods required to trigger changes on behalf of the 65 // deployment 66 deploymentTriggers 67 68 // DeploymentRPC holds methods for interacting with peer regions 69 // in enterprise edition 70 DeploymentRPC 71 72 // JobRPC holds methods for interacting with peer regions 73 // in enterprise edition 74 JobRPC 75 76 // state is the state that is watched for state changes. 77 state *state.StateStore 78 79 // deploymentID is the deployment's ID being watched 80 deploymentID string 81 82 // deploymentUpdateCh is triggered when there is an updated deployment 83 deploymentUpdateCh chan struct{} 84 85 // d is the deployment being watched 86 d *structs.Deployment 87 88 // j is the job the deployment is for 89 j *structs.Job 90 91 // outstandingBatch marks whether an outstanding function exists to create 92 // the evaluation. Access should be done through the lock. 93 outstandingBatch bool 94 95 // outstandingAllowReplacements is the map of allocations that will be 96 // marked as allowing a replacement. Access should be done through the lock. 97 outstandingAllowReplacements map[string]*structs.DesiredTransition 98 99 // latestEval is the latest eval for the job. It is updated by the watch 100 // loop and any time an evaluation is created. The field should be accessed 101 // by holding the lock or using the setter and getter methods. 102 latestEval uint64 103 104 logger log.Logger 105 ctx context.Context 106 exitFn context.CancelFunc 107 l sync.RWMutex 108 } 109 110 // newDeploymentWatcher returns a deployment watcher that is used to watch 111 // deployments and trigger the scheduler as needed. 112 func newDeploymentWatcher(parent context.Context, queryLimiter *rate.Limiter, 113 logger log.Logger, state *state.StateStore, d *structs.Deployment, 114 j *structs.Job, triggers deploymentTriggers, 115 deploymentRPC DeploymentRPC, jobRPC JobRPC) *deploymentWatcher { 116 117 ctx, exitFn := context.WithCancel(parent) 118 w := &deploymentWatcher{ 119 queryLimiter: queryLimiter, 120 deploymentID: d.ID, 121 deploymentUpdateCh: make(chan struct{}, 1), 122 d: d, 123 j: j, 124 state: state, 125 deploymentTriggers: triggers, 126 DeploymentRPC: deploymentRPC, 127 JobRPC: jobRPC, 128 logger: logger.With("deployment_id", d.ID, "job", j.NamespacedID()), 129 ctx: ctx, 130 exitFn: exitFn, 131 } 132 133 // Start the long lived watcher that scans for allocation updates 134 go w.watch() 135 136 return w 137 } 138 139 // updateDeployment is used to update the tracked deployment. 140 func (w *deploymentWatcher) updateDeployment(d *structs.Deployment) { 141 w.l.Lock() 142 defer w.l.Unlock() 143 144 // Update and trigger 145 w.d = d 146 select { 147 case w.deploymentUpdateCh <- struct{}{}: 148 default: 149 } 150 } 151 152 // getDeployment returns the tracked deployment. 153 func (w *deploymentWatcher) getDeployment() *structs.Deployment { 154 w.l.RLock() 155 defer w.l.RUnlock() 156 return w.d 157 } 158 159 func (w *deploymentWatcher) SetAllocHealth( 160 req *structs.DeploymentAllocHealthRequest, 161 resp *structs.DeploymentUpdateResponse) error { 162 163 // If we are failing the deployment, update the status and potentially 164 // rollback 165 var j *structs.Job 166 var u *structs.DeploymentStatusUpdate 167 168 // If there are unhealthy allocations we need to mark the deployment as 169 // failed and check if we should roll back to a stable job. 170 if l := len(req.UnhealthyAllocationIDs); l != 0 { 171 unhealthy := make(map[string]struct{}, l) 172 for _, alloc := range req.UnhealthyAllocationIDs { 173 unhealthy[alloc] = struct{}{} 174 } 175 176 // Get the allocations for the deployment 177 snap, err := w.state.Snapshot() 178 if err != nil { 179 return err 180 } 181 182 allocs, err := snap.AllocsByDeployment(nil, req.DeploymentID) 183 if err != nil { 184 return err 185 } 186 187 // Determine if we should autorevert to an older job 188 desc := structs.DeploymentStatusDescriptionFailedAllocations 189 for _, alloc := range allocs { 190 // Check that the alloc has been marked unhealthy 191 if _, ok := unhealthy[alloc.ID]; !ok { 192 continue 193 } 194 195 // Check if the group has autorevert set 196 dstate, ok := w.getDeployment().TaskGroups[alloc.TaskGroup] 197 if !ok || !dstate.AutoRevert { 198 continue 199 } 200 201 var err error 202 j, err = w.latestStableJob() 203 if err != nil { 204 return err 205 } 206 207 if j != nil { 208 j, desc = w.handleRollbackValidity(j, desc) 209 } 210 break 211 } 212 213 u = w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 214 } 215 216 // Canonicalize the job in case it doesn't have namespace set 217 j.Canonicalize() 218 219 // Create the request 220 areq := &structs.ApplyDeploymentAllocHealthRequest{ 221 DeploymentAllocHealthRequest: *req, 222 Timestamp: time.Now(), 223 Eval: w.getEval(), 224 DeploymentUpdate: u, 225 Job: j, 226 } 227 228 index, err := w.upsertDeploymentAllocHealth(areq) 229 if err != nil { 230 return err 231 } 232 233 // Build the response 234 resp.EvalID = areq.Eval.ID 235 resp.EvalCreateIndex = index 236 resp.DeploymentModifyIndex = index 237 resp.Index = index 238 if j != nil { 239 resp.RevertedJobVersion = pointer.Of(j.Version) 240 } 241 return nil 242 } 243 244 // handleRollbackValidity checks if the job being rolled back to has the same spec as the existing job 245 // Returns a modified description and job accordingly. 246 func (w *deploymentWatcher) handleRollbackValidity(rollbackJob *structs.Job, desc string) (*structs.Job, string) { 247 // Only rollback if job being changed has a different spec. 248 // This prevents an infinite revert cycle when a previously stable version of the job fails to start up during a rollback 249 // If the job we are trying to rollback to is identical to the current job, we stop because the rollback will not succeed. 250 if w.j.SpecChanged(rollbackJob) { 251 desc = structs.DeploymentStatusDescriptionRollback(desc, rollbackJob.Version) 252 } else { 253 desc = structs.DeploymentStatusDescriptionRollbackNoop(desc, rollbackJob.Version) 254 rollbackJob = nil 255 } 256 return rollbackJob, desc 257 } 258 259 func (w *deploymentWatcher) PromoteDeployment( 260 req *structs.DeploymentPromoteRequest, 261 resp *structs.DeploymentUpdateResponse) error { 262 263 // Create the request 264 areq := &structs.ApplyDeploymentPromoteRequest{ 265 DeploymentPromoteRequest: *req, 266 Eval: w.getEval(), 267 } 268 269 index, err := w.upsertDeploymentPromotion(areq) 270 if err != nil { 271 return err 272 } 273 274 // Build the response 275 resp.EvalID = areq.Eval.ID 276 resp.EvalCreateIndex = index 277 resp.DeploymentModifyIndex = index 278 resp.Index = index 279 return nil 280 } 281 282 // autoPromoteDeployment creates a synthetic promotion request, and upserts it for processing 283 func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStub) error { 284 d := w.getDeployment() 285 if !d.HasPlacedCanaries() || !d.RequiresPromotion() { 286 return nil 287 } 288 289 // AutoPromote iff every task group with canaries is marked auto_promote and is healthy. The whole 290 // job version has been incremented, so we promote together. See also AutoRevert 291 for _, dstate := range d.TaskGroups { 292 293 // skip auto promote canary validation if the task group has no canaries 294 // to prevent auto promote hanging on mixed canary/non-canary taskgroup deploys 295 if dstate.DesiredCanaries < 1 { 296 continue 297 } 298 299 if !dstate.AutoPromote || len(dstate.PlacedCanaries) < dstate.DesiredCanaries { 300 return nil 301 } 302 303 healthyCanaries := 0 304 // Find the health status of each canary 305 for _, c := range dstate.PlacedCanaries { 306 for _, a := range allocs { 307 if c == a.ID && a.DeploymentStatus.IsHealthy() { 308 healthyCanaries += 1 309 } 310 } 311 } 312 if healthyCanaries != dstate.DesiredCanaries { 313 return nil 314 } 315 } 316 317 // Send the request 318 _, err := w.upsertDeploymentPromotion(&structs.ApplyDeploymentPromoteRequest{ 319 DeploymentPromoteRequest: structs.DeploymentPromoteRequest{DeploymentID: d.GetID(), All: true}, 320 Eval: w.getEval(), 321 }) 322 return err 323 } 324 325 func (w *deploymentWatcher) PauseDeployment( 326 req *structs.DeploymentPauseRequest, 327 resp *structs.DeploymentUpdateResponse) error { 328 // Determine the status we should transition to and if we need to create an 329 // evaluation 330 status, desc := structs.DeploymentStatusPaused, structs.DeploymentStatusDescriptionPaused 331 var eval *structs.Evaluation 332 evalID := "" 333 if !req.Pause { 334 status, desc = structs.DeploymentStatusRunning, structs.DeploymentStatusDescriptionRunning 335 eval = w.getEval() 336 evalID = eval.ID 337 } 338 update := w.getDeploymentStatusUpdate(status, desc) 339 340 // Commit the change 341 i, err := w.upsertDeploymentStatusUpdate(update, eval, nil) 342 if err != nil { 343 return err 344 } 345 346 // Build the response 347 if evalID != "" { 348 resp.EvalID = evalID 349 resp.EvalCreateIndex = i 350 } 351 resp.DeploymentModifyIndex = i 352 resp.Index = i 353 return nil 354 } 355 356 func (w *deploymentWatcher) FailDeployment( 357 req *structs.DeploymentFailRequest, 358 resp *structs.DeploymentUpdateResponse) error { 359 360 status, desc := structs.DeploymentStatusFailed, structs.DeploymentStatusDescriptionFailedByUser 361 362 // Determine if we should rollback 363 rollback := false 364 for _, dstate := range w.getDeployment().TaskGroups { 365 if dstate.AutoRevert { 366 rollback = true 367 break 368 } 369 } 370 371 var rollbackJob *structs.Job 372 if rollback { 373 var err error 374 rollbackJob, err = w.latestStableJob() 375 if err != nil { 376 return err 377 } 378 379 if rollbackJob != nil { 380 rollbackJob, desc = w.handleRollbackValidity(rollbackJob, desc) 381 } else { 382 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 383 } 384 } 385 386 // Commit the change 387 update := w.getDeploymentStatusUpdate(status, desc) 388 eval := w.getEval() 389 i, err := w.upsertDeploymentStatusUpdate(update, eval, rollbackJob) 390 if err != nil { 391 return err 392 } 393 394 // Build the response 395 resp.EvalID = eval.ID 396 resp.EvalCreateIndex = i 397 resp.DeploymentModifyIndex = i 398 resp.Index = i 399 if rollbackJob != nil { 400 resp.RevertedJobVersion = pointer.Of(rollbackJob.Version) 401 } 402 return nil 403 } 404 405 // StopWatch stops watching the deployment. This should be called whenever a 406 // deployment is completed or the watcher is no longer needed. 407 func (w *deploymentWatcher) StopWatch() { 408 w.exitFn() 409 } 410 411 // watch is the long running watcher that watches for both allocation and 412 // deployment changes. Its function is to create evaluations to trigger the 413 // scheduler when more progress can be made, to fail the deployment if it has 414 // failed and potentially rolling back the job. Progress can be made when an 415 // allocation transitions to healthy, so we create an eval. 416 func (w *deploymentWatcher) watch() { 417 // Get the deadline. This is likely a zero time to begin with but we need to 418 // handle the case that the deployment has already progressed and we are now 419 // just starting to watch it. This must likely would occur if there was a 420 // leader transition and we are now starting our watcher. 421 currentDeadline := w.getDeploymentProgressCutoff(w.getDeployment()) 422 var deadlineTimer *time.Timer 423 if currentDeadline.IsZero() { 424 deadlineTimer = time.NewTimer(0) 425 if !deadlineTimer.Stop() { 426 <-deadlineTimer.C 427 } 428 } else { 429 deadlineTimer = time.NewTimer(time.Until(currentDeadline)) 430 } 431 432 allocIndex := uint64(1) 433 allocsCh := w.getAllocsCh(allocIndex) 434 var updates *allocUpdates 435 436 rollback, deadlineHit := false, false 437 438 FAIL: 439 for { 440 select { 441 case <-w.ctx.Done(): 442 // This is the successful case, and we stop the loop 443 return 444 case <-deadlineTimer.C: 445 // We have hit the progress deadline, so fail the deployment 446 // unless we're waiting for manual promotion. We need to determine 447 // whether we should roll back the job by inspecting which allocs 448 // as part of the deployment are healthy and which aren't. The 449 // deadlineHit flag is never reset, so even in the case of a 450 // manual promotion, we'll describe any failure as a progress 451 // deadline failure at this point. 452 deadlineHit = true 453 fail, rback, err := w.shouldFail() 454 if err != nil { 455 w.logger.Error("failed to determine whether to rollback job", "error", err) 456 } 457 if !fail { 458 w.logger.Debug("skipping deadline") 459 continue 460 } 461 462 w.logger.Debug("deadline hit", "rollback", rback) 463 rollback = rback 464 err = w.nextRegion(structs.DeploymentStatusFailed) 465 if err != nil { 466 w.logger.Error("multiregion deployment error", "error", err) 467 } 468 break FAIL 469 case <-w.deploymentUpdateCh: 470 // Get the updated deployment and check if we should change the 471 // deadline timer 472 next := w.getDeploymentProgressCutoff(w.getDeployment()) 473 if !next.Equal(currentDeadline) { 474 prevDeadlineZero := currentDeadline.IsZero() 475 currentDeadline = next 476 // The most recent deadline can be zero if no allocs were created for this deployment. 477 // The deadline timer would have already been stopped once in that case. To prevent 478 // deadlocking on the already stopped deadline timer, we only drain the channel if 479 // the previous deadline was not zero. 480 if !prevDeadlineZero && !deadlineTimer.Stop() { 481 select { 482 case <-deadlineTimer.C: 483 default: 484 } 485 } 486 487 // If the next deadline is zero, we should not reset the timer 488 // as we aren't tracking towards a progress deadline yet. This 489 // can happen if you have multiple task groups with progress 490 // deadlines and one of the task groups hasn't made any 491 // placements. As soon as the other task group finishes its 492 // rollout, the next progress deadline becomes zero, so we want 493 // to avoid resetting, causing a deployment failure. 494 if !next.IsZero() { 495 deadlineTimer.Reset(time.Until(next)) 496 w.logger.Trace("resetting deadline") 497 } 498 } 499 500 err := w.nextRegion(w.getStatus()) 501 if err != nil { 502 break FAIL 503 } 504 505 case updates = <-allocsCh: 506 if err := updates.err; err != nil { 507 if err == context.Canceled || w.ctx.Err() == context.Canceled { 508 return 509 } 510 511 w.logger.Error("failed to retrieve allocations", "error", err) 512 return 513 } 514 allocIndex = updates.index 515 516 // We have allocation changes for this deployment so determine the 517 // steps to take. 518 res, err := w.handleAllocUpdate(updates.allocs) 519 if err != nil { 520 if err == context.Canceled || w.ctx.Err() == context.Canceled { 521 return 522 } 523 524 w.logger.Error("failed handling allocation updates", "error", err) 525 return 526 } 527 528 // The deployment has failed, so break out of the watch loop and 529 // handle the failure 530 if res.failDeployment { 531 rollback = res.rollback 532 err := w.nextRegion(structs.DeploymentStatusFailed) 533 if err != nil { 534 w.logger.Error("multiregion deployment error", "error", err) 535 } 536 break FAIL 537 } 538 539 // If permitted, automatically promote this canary deployment 540 err = w.autoPromoteDeployment(updates.allocs) 541 if err != nil { 542 w.logger.Error("failed to auto promote deployment", "error", err) 543 } 544 545 // Create an eval to push the deployment along 546 if res.createEval || len(res.allowReplacements) != 0 { 547 w.createBatchedUpdate(res.allowReplacements, allocIndex) 548 } 549 550 // only start a new blocking query if we haven't returned early 551 allocsCh = w.getAllocsCh(allocIndex) 552 } 553 } 554 555 // Change the deployments status to failed 556 desc := structs.DeploymentStatusDescriptionFailedAllocations 557 if deadlineHit { 558 desc = structs.DeploymentStatusDescriptionProgressDeadline 559 } 560 561 // Rollback to the old job if necessary 562 var j *structs.Job 563 if rollback { 564 var err error 565 j, err = w.latestStableJob() 566 if err != nil { 567 w.logger.Error("failed to lookup latest stable job", "error", err) 568 } 569 570 // Description should include that the job is being rolled back to 571 // version N 572 if j != nil { 573 j, desc = w.handleRollbackValidity(j, desc) 574 } else { 575 desc = structs.DeploymentStatusDescriptionNoRollbackTarget(desc) 576 } 577 } 578 579 // Update the status of the deployment to failed and create an evaluation. 580 e := w.getEval() 581 u := w.getDeploymentStatusUpdate(structs.DeploymentStatusFailed, desc) 582 if _, err := w.upsertDeploymentStatusUpdate(u, e, j); err != nil { 583 w.logger.Error("failed to update deployment status", "error", err) 584 } 585 } 586 587 // allocUpdateResult is used to return the desired actions given the newest set 588 // of allocations for the deployment. 589 type allocUpdateResult struct { 590 createEval bool 591 failDeployment bool 592 rollback bool 593 allowReplacements []string 594 } 595 596 // handleAllocUpdate is used to compute the set of actions to take based on the 597 // updated allocations for the deployment. 598 func (w *deploymentWatcher) handleAllocUpdate(allocs []*structs.AllocListStub) (allocUpdateResult, error) { 599 var res allocUpdateResult 600 601 // Get the latest evaluation index 602 latestEval, err := w.jobEvalStatus() 603 if err != nil { 604 if err == context.Canceled || w.ctx.Err() == context.Canceled { 605 return res, err 606 } 607 608 return res, fmt.Errorf("failed to determine last evaluation index for job %q: %v", w.j.ID, err) 609 } 610 611 deployment := w.getDeployment() 612 for _, alloc := range allocs { 613 dstate, ok := deployment.TaskGroups[alloc.TaskGroup] 614 if !ok { 615 continue 616 } 617 618 // Check if we can already fail the deployment 619 failDeployment := w.shouldFailEarly(deployment, alloc, dstate) 620 621 // Check if the allocation has failed and we need to mark it for allow 622 // replacements 623 if alloc.DeploymentStatus.IsUnhealthy() && !failDeployment && 624 deployment.Active() && !alloc.DesiredTransition.ShouldReschedule() { 625 res.allowReplacements = append(res.allowReplacements, alloc.ID) 626 continue 627 } 628 629 // We need to create an eval so the job can progress. 630 if alloc.DeploymentStatus.IsHealthy() && alloc.DeploymentStatus.ModifyIndex > latestEval { 631 res.createEval = true 632 } 633 634 if failDeployment { 635 // Check if the group has autorevert set 636 if dstate.AutoRevert { 637 res.rollback = true 638 } 639 640 res.failDeployment = true 641 } 642 643 // All conditions have been hit so we can break 644 if res.createEval && res.failDeployment && res.rollback { 645 break 646 } 647 } 648 649 return res, nil 650 } 651 652 // shouldFail returns whether the job should be failed and whether it should 653 // rolled back to an earlier stable version by examining the allocations in the 654 // deployment. 655 func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) { 656 snap, err := w.state.Snapshot() 657 if err != nil { 658 return false, false, err 659 } 660 661 d, err := snap.DeploymentByID(nil, w.deploymentID) 662 if err != nil { 663 return false, false, err 664 } 665 if d == nil { 666 // The deployment wasn't in the state store, possibly due to a system gc 667 return false, false, fmt.Errorf("deployment id not found: %q", w.deploymentID) 668 } 669 670 fail = false 671 for tg, dstate := range d.TaskGroups { 672 // If we are in a canary state we fail if there aren't enough healthy 673 // allocs to satisfy DesiredCanaries 674 if dstate.DesiredCanaries > 0 && !dstate.Promoted { 675 if dstate.HealthyAllocs >= dstate.DesiredCanaries { 676 continue 677 } 678 } else if dstate.HealthyAllocs >= dstate.DesiredTotal { 679 continue 680 } 681 682 // We have failed this TG 683 fail = true 684 685 // We don't need to autorevert this group 686 upd := w.j.LookupTaskGroup(tg).Update 687 if upd == nil || !upd.AutoRevert { 688 continue 689 } 690 691 // Unhealthy allocs and we need to autorevert 692 return fail, true, nil 693 } 694 695 return fail, false, nil 696 } 697 698 func (w *deploymentWatcher) shouldFailEarly(deployment *structs.Deployment, alloc *structs.AllocListStub, dstate *structs.DeploymentState) bool { 699 if !alloc.DeploymentStatus.IsUnhealthy() { 700 return false 701 } 702 703 // Fail on the first unhealthy allocation if no progress deadline is specified. 704 if dstate.ProgressDeadline == 0 { 705 w.logger.Debug("failing deployment because an allocation failed and the deployment is not progress based", "alloc", alloc.ID) 706 return true 707 } 708 709 if deployment.Active() { 710 reschedulePolicy := w.j.LookupTaskGroup(alloc.TaskGroup).ReschedulePolicy 711 isRescheduleEligible := alloc.RescheduleEligible(reschedulePolicy, time.Now()) 712 if !isRescheduleEligible { 713 // We have run out of reschedule attempts: do not wait for the progress deadline to expire because 714 // we know that we will not be able to try to get another allocation healthy 715 w.logger.Debug("failing deployment because an allocation has failed and the task group has run out of reschedule attempts", "alloc", alloc.ID) 716 return true 717 } 718 } 719 720 return false 721 } 722 723 // getDeploymentProgressCutoff returns the progress cutoff for the given 724 // deployment 725 func (w *deploymentWatcher) getDeploymentProgressCutoff(d *structs.Deployment) time.Time { 726 var next time.Time 727 doneTGs := w.doneGroups(d) 728 for name, dstate := range d.TaskGroups { 729 // This task group is done so we don't have to concern ourselves with 730 // its progress deadline. 731 if done, ok := doneTGs[name]; ok && done { 732 continue 733 } 734 735 if dstate.RequireProgressBy.IsZero() { 736 continue 737 } 738 739 if next.IsZero() || dstate.RequireProgressBy.Before(next) { 740 next = dstate.RequireProgressBy 741 } 742 } 743 return next 744 } 745 746 // doneGroups returns a map of task group to whether the deployment appears to 747 // be done for the group. A true value doesn't mean no more action will be taken 748 // in the life time of the deployment because there could always be node 749 // failures, or rescheduling events. 750 func (w *deploymentWatcher) doneGroups(d *structs.Deployment) map[string]bool { 751 if d == nil { 752 return nil 753 } 754 755 // Collect the allocations by the task group 756 snap, err := w.state.Snapshot() 757 if err != nil { 758 return nil 759 } 760 761 allocs, err := snap.AllocsByDeployment(nil, d.ID) 762 if err != nil { 763 return nil 764 } 765 766 // Go through the allocs and count up how many healthy allocs we have 767 healthy := make(map[string]int, len(d.TaskGroups)) 768 for _, a := range allocs { 769 if a.TerminalStatus() || !a.DeploymentStatus.IsHealthy() { 770 continue 771 } 772 healthy[a.TaskGroup]++ 773 } 774 775 // Go through each group and check if it done 776 groups := make(map[string]bool, len(d.TaskGroups)) 777 for name, dstate := range d.TaskGroups { 778 // Requires promotion 779 if dstate.DesiredCanaries != 0 && !dstate.Promoted { 780 groups[name] = false 781 continue 782 } 783 784 // Check we have enough healthy currently running allocations 785 groups[name] = healthy[name] >= dstate.DesiredTotal 786 } 787 788 return groups 789 } 790 791 // latestStableJob returns the latest stable job. It may be nil if none exist 792 func (w *deploymentWatcher) latestStableJob() (*structs.Job, error) { 793 snap, err := w.state.Snapshot() 794 if err != nil { 795 return nil, err 796 } 797 798 versions, err := snap.JobVersionsByID(nil, w.j.Namespace, w.j.ID) 799 if err != nil { 800 return nil, err 801 } 802 803 var stable *structs.Job 804 for _, job := range versions { 805 if job.Stable { 806 stable = job 807 break 808 } 809 } 810 811 return stable, nil 812 } 813 814 // createBatchedUpdate creates an eval for the given index as well as updating 815 // the given allocations to allow them to reschedule. 816 func (w *deploymentWatcher) createBatchedUpdate(allowReplacements []string, forIndex uint64) { 817 w.l.Lock() 818 defer w.l.Unlock() 819 820 // Store the allocations that can be replaced 821 for _, allocID := range allowReplacements { 822 if w.outstandingAllowReplacements == nil { 823 w.outstandingAllowReplacements = make(map[string]*structs.DesiredTransition, len(allowReplacements)) 824 } 825 w.outstandingAllowReplacements[allocID] = allowRescheduleTransition 826 } 827 828 if w.outstandingBatch || (forIndex < w.latestEval && len(allowReplacements) == 0) { 829 return 830 } 831 832 w.outstandingBatch = true 833 834 time.AfterFunc(perJobEvalBatchPeriod, func() { 835 // If the timer has been created and then we shutdown, we need to no-op 836 // the evaluation creation. 837 select { 838 case <-w.ctx.Done(): 839 return 840 default: 841 } 842 843 w.l.Lock() 844 replacements := w.outstandingAllowReplacements 845 w.outstandingAllowReplacements = nil 846 w.outstandingBatch = false 847 w.l.Unlock() 848 849 // Create the eval 850 if _, err := w.createUpdate(replacements, w.getEval()); err != nil { 851 w.logger.Error("failed to create evaluation for deployment", "deployment_id", w.deploymentID, "error", err) 852 } 853 }) 854 } 855 856 // getEval returns an evaluation suitable for the deployment 857 func (w *deploymentWatcher) getEval() *structs.Evaluation { 858 now := time.Now().UTC().UnixNano() 859 860 // During a server upgrade it's possible we end up with deployments created 861 // on the previous version that are then "watched" on a leader that's on 862 // the new version. This would result in an eval with its priority set to 863 // zero which would be bad. This therefore protects against that. 864 w.l.Lock() 865 priority := w.d.EvalPriority 866 if priority == 0 { 867 priority = w.j.Priority 868 } 869 w.l.Unlock() 870 871 return &structs.Evaluation{ 872 ID: uuid.Generate(), 873 Namespace: w.j.Namespace, 874 Priority: priority, 875 Type: w.j.Type, 876 TriggeredBy: structs.EvalTriggerDeploymentWatcher, 877 JobID: w.j.ID, 878 DeploymentID: w.deploymentID, 879 Status: structs.EvalStatusPending, 880 CreateTime: now, 881 ModifyTime: now, 882 } 883 } 884 885 // getDeploymentStatusUpdate returns a deployment status update 886 func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate { 887 return &structs.DeploymentStatusUpdate{ 888 DeploymentID: w.deploymentID, 889 Status: status, 890 StatusDescription: desc, 891 } 892 } 893 894 // getStatus returns the current status of the deployment 895 func (w *deploymentWatcher) getStatus() string { 896 w.l.RLock() 897 defer w.l.RUnlock() 898 return w.d.Status 899 } 900 901 type allocUpdates struct { 902 allocs []*structs.AllocListStub 903 index uint64 904 err error 905 } 906 907 // getAllocsCh creates a channel and starts a goroutine that 908 // 1. parks a blocking query for allocations on the state 909 // 2. reads those and drops them on the channel 910 // This query runs once here, but watch calls it in a loop 911 func (w *deploymentWatcher) getAllocsCh(index uint64) <-chan *allocUpdates { 912 out := make(chan *allocUpdates, 1) 913 go func() { 914 allocs, index, err := w.getAllocs(index) 915 out <- &allocUpdates{ 916 allocs: allocs, 917 index: index, 918 err: err, 919 } 920 }() 921 922 return out 923 } 924 925 // getAllocs retrieves the allocations that are part of the deployment blocking 926 // at the given index. 927 func (w *deploymentWatcher) getAllocs(index uint64) ([]*structs.AllocListStub, uint64, error) { 928 resp, index, err := w.state.BlockingQuery(w.getAllocsImpl, index, w.ctx) 929 if err != nil { 930 return nil, 0, err 931 } 932 if err := w.ctx.Err(); err != nil { 933 return nil, 0, err 934 } 935 936 return resp.([]*structs.AllocListStub), index, nil 937 } 938 939 // getDeploysImpl retrieves all deployments from the passed state store. 940 func (w *deploymentWatcher) getAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { 941 if err := w.queryLimiter.Wait(w.ctx); err != nil { 942 return nil, 0, err 943 } 944 945 // Capture all the allocations 946 allocs, err := state.AllocsByDeployment(ws, w.deploymentID) 947 if err != nil { 948 return nil, 0, err 949 } 950 951 maxIndex := uint64(0) 952 stubs := make([]*structs.AllocListStub, 0, len(allocs)) 953 for _, alloc := range allocs { 954 stubs = append(stubs, alloc.Stub(nil)) 955 956 if maxIndex < alloc.ModifyIndex { 957 maxIndex = alloc.ModifyIndex 958 } 959 } 960 961 // Use the last index that affected the allocs table 962 if len(stubs) == 0 { 963 index, err := state.Index("allocs") 964 if err != nil { 965 return nil, index, err 966 } 967 maxIndex = index 968 } 969 970 return stubs, maxIndex, nil 971 } 972 973 // jobEvalStatus returns the latest eval index for a job. The index is used to 974 // determine if an allocation update requires an evaluation to be triggered. 975 func (w *deploymentWatcher) jobEvalStatus() (latestIndex uint64, err error) { 976 if err := w.queryLimiter.Wait(w.ctx); err != nil { 977 return 0, err 978 } 979 980 snap, err := w.state.Snapshot() 981 if err != nil { 982 return 0, err 983 } 984 985 evals, err := snap.EvalsByJob(nil, w.j.Namespace, w.j.ID) 986 if err != nil { 987 return 0, err 988 } 989 990 // If there are no evals for the job, return zero, since we want any 991 // allocation change to trigger an evaluation. 992 if len(evals) == 0 { 993 return 0, nil 994 } 995 996 var max uint64 997 for _, eval := range evals { 998 // A cancelled eval never impacts what the scheduler has saw, so do not 999 // use it's indexes. 1000 if eval.Status == structs.EvalStatusCancelled { 1001 continue 1002 } 1003 1004 // Prefer using the snapshot index. Otherwise use the create index 1005 if eval.SnapshotIndex != 0 && max < eval.SnapshotIndex { 1006 max = eval.SnapshotIndex 1007 } else if max < eval.CreateIndex { 1008 max = eval.CreateIndex 1009 } 1010 } 1011 1012 return max, nil 1013 }