github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/scheduler/reconcile.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "log" 6 "time" 7 8 "github.com/hashicorp/nomad/helper" 9 "github.com/hashicorp/nomad/nomad/structs" 10 ) 11 12 // allocUpdateType takes an existing allocation and a new job definition and 13 // returns whether the allocation can ignore the change, requires a destructive 14 // update, or can be inplace updated. If it can be inplace updated, an updated 15 // allocation that has the new resources and alloc metrics attached will be 16 // returned. 17 type allocUpdateType func(existing *structs.Allocation, newJob *structs.Job, 18 newTG *structs.TaskGroup) (ignore, destructive bool, updated *structs.Allocation) 19 20 // allocReconciler is used to determine the set of allocations that require 21 // placement, inplace updating or stopping given the job specification and 22 // existing cluster state. The reconciler should only be used for batch and 23 // service jobs. 24 type allocReconciler struct { 25 // logger is used to log debug information. Logging should be kept at a 26 // minimal here 27 logger *log.Logger 28 29 // canInplace is used to check if the allocation can be inplace upgraded 30 allocUpdateFn allocUpdateType 31 32 // batch marks whether the job is a batch job 33 batch bool 34 35 // job is the job being operated on, it may be nil if the job is being 36 // stopped via a purge 37 job *structs.Job 38 39 // jobID is the ID of the job being operated on. The job may be nil if it is 40 // being stopped so we require this separately. 41 jobID string 42 43 // oldDeployment is the last deployment for the job 44 oldDeployment *structs.Deployment 45 46 // deployment is the current deployment for the job 47 deployment *structs.Deployment 48 49 // deploymentPaused marks whether the deployment is paused 50 deploymentPaused bool 51 52 // deploymentFailed marks whether the deployment is failed 53 deploymentFailed bool 54 55 // taintedNodes contains a map of nodes that are tainted 56 taintedNodes map[string]*structs.Node 57 58 // existingAllocs is non-terminal existing allocations 59 existingAllocs []*structs.Allocation 60 61 // result is the results of the reconcile. During computation it can be 62 // used to store intermediate state 63 result *reconcileResults 64 } 65 66 // reconcileResults contains the results of the reconciliation and should be 67 // applied by the scheduler. 68 type reconcileResults struct { 69 // deployment is the deployment that should be created or updated as a 70 // result of scheduling 71 deployment *structs.Deployment 72 73 // deploymentUpdates contains a set of deployment updates that should be 74 // applied as a result of scheduling 75 deploymentUpdates []*structs.DeploymentStatusUpdate 76 77 // place is the set of allocations to place by the scheduler 78 place []allocPlaceResult 79 80 // destructiveUpdate is the set of allocations to apply a destructive update to 81 destructiveUpdate []allocDestructiveResult 82 83 // inplaceUpdate is the set of allocations to apply an inplace update to 84 inplaceUpdate []*structs.Allocation 85 86 // stop is the set of allocations to stop 87 stop []allocStopResult 88 89 // desiredTGUpdates captures the desired set of changes to make for each 90 // task group. 91 desiredTGUpdates map[string]*structs.DesiredUpdates 92 93 // followupEvalWait is set if there should be a followup eval run after the 94 // given duration 95 followupEvalWait time.Duration 96 } 97 98 func (r *reconcileResults) GoString() string { 99 base := fmt.Sprintf("Total changes: (place %d) (destructive %d) (inplace %d) (stop %d)", 100 len(r.place), len(r.destructiveUpdate), len(r.inplaceUpdate), len(r.stop)) 101 102 if r.deployment != nil { 103 base += fmt.Sprintf("\nCreated Deployment: %q", r.deployment.ID) 104 } 105 for _, u := range r.deploymentUpdates { 106 base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q", 107 u.DeploymentID, u.Status, u.StatusDescription) 108 } 109 if r.followupEvalWait != 0 { 110 base += fmt.Sprintf("\nFollowup Eval in %v", r.followupEvalWait) 111 } 112 for tg, u := range r.desiredTGUpdates { 113 base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u) 114 } 115 return base 116 } 117 118 // Changes returns the number of total changes 119 func (r *reconcileResults) Changes() int { 120 return len(r.place) + len(r.inplaceUpdate) + len(r.stop) 121 } 122 123 // NewAllocReconciler creates a new reconciler that should be used to determine 124 // the changes required to bring the cluster state inline with the declared jobspec 125 func NewAllocReconciler(logger *log.Logger, allocUpdateFn allocUpdateType, batch bool, 126 jobID string, job *structs.Job, deployment *structs.Deployment, 127 existingAllocs []*structs.Allocation, taintedNodes map[string]*structs.Node) *allocReconciler { 128 129 return &allocReconciler{ 130 logger: logger, 131 allocUpdateFn: allocUpdateFn, 132 batch: batch, 133 jobID: jobID, 134 job: job, 135 deployment: deployment.Copy(), 136 existingAllocs: existingAllocs, 137 taintedNodes: taintedNodes, 138 result: &reconcileResults{ 139 desiredTGUpdates: make(map[string]*structs.DesiredUpdates), 140 }, 141 } 142 } 143 144 // Compute reconciles the existing cluster state and returns the set of changes 145 // required to converge the job spec and state 146 func (a *allocReconciler) Compute() *reconcileResults { 147 // Create the allocation matrix 148 m := newAllocMatrix(a.job, a.existingAllocs) 149 150 // Handle stopping unneeded deployments 151 a.cancelDeployments() 152 153 // If we are just stopping a job we do not need to do anything more than 154 // stopping all running allocs 155 if a.job.Stopped() { 156 a.handleStop(m) 157 return a.result 158 } 159 160 // Detect if the deployment is paused 161 if a.deployment != nil { 162 // Detect if any allocs associated with this deploy have failed 163 // Failed allocations could edge trigger an evaluation before the deployment watcher 164 // runs and marks the deploy as failed. This block makes sure that is still 165 // considered a failed deploy 166 failedAllocsInDeploy := false 167 for _, as := range m { 168 for _, alloc := range as { 169 if alloc.DeploymentID == a.deployment.ID && alloc.ClientStatus == structs.AllocClientStatusFailed { 170 failedAllocsInDeploy = true 171 } 172 } 173 } 174 a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused 175 a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed || failedAllocsInDeploy 176 } 177 178 // Reconcile each group 179 complete := true 180 for group, as := range m { 181 groupComplete := a.computeGroup(group, as) 182 complete = complete && groupComplete 183 } 184 185 // Mark the deployment as complete if possible 186 if a.deployment != nil && complete { 187 a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ 188 DeploymentID: a.deployment.ID, 189 Status: structs.DeploymentStatusSuccessful, 190 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 191 }) 192 } 193 194 // Set the description of a created deployment 195 if d := a.result.deployment; d != nil { 196 if d.RequiresPromotion() { 197 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 198 } 199 } 200 201 return a.result 202 } 203 204 // cancelDeployments cancels any deployment that is not needed 205 func (a *allocReconciler) cancelDeployments() { 206 // If the job is stopped and there is a non-terminal deployment, cancel it 207 if a.job.Stopped() { 208 if a.deployment != nil && a.deployment.Active() { 209 a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ 210 DeploymentID: a.deployment.ID, 211 Status: structs.DeploymentStatusCancelled, 212 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 213 }) 214 } 215 216 // Nothing else to do 217 a.oldDeployment = a.deployment 218 a.deployment = nil 219 return 220 } 221 222 d := a.deployment 223 if d == nil { 224 return 225 } 226 227 // Check if the deployment is active and referencing an older job and cancel it 228 if d.JobCreateIndex != a.job.CreateIndex || d.JobVersion != a.job.Version { 229 if d.Active() { 230 a.result.deploymentUpdates = append(a.result.deploymentUpdates, &structs.DeploymentStatusUpdate{ 231 DeploymentID: a.deployment.ID, 232 Status: structs.DeploymentStatusCancelled, 233 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 234 }) 235 } 236 237 a.oldDeployment = d 238 a.deployment = nil 239 } 240 241 // Clear it as the current deployment if it is successful 242 if d.Status == structs.DeploymentStatusSuccessful { 243 a.oldDeployment = d 244 a.deployment = nil 245 } 246 } 247 248 // handleStop marks all allocations to be stopped, handling the lost case 249 func (a *allocReconciler) handleStop(m allocMatrix) { 250 for group, as := range m { 251 untainted, migrate, lost := as.filterByTainted(a.taintedNodes) 252 a.markStop(untainted, "", allocNotNeeded) 253 a.markStop(migrate, "", allocNotNeeded) 254 a.markStop(lost, structs.AllocClientStatusLost, allocLost) 255 desiredChanges := new(structs.DesiredUpdates) 256 desiredChanges.Stop = uint64(len(as)) 257 a.result.desiredTGUpdates[group] = desiredChanges 258 } 259 } 260 261 // markStop is a helper for marking a set of allocation for stop with a 262 // particular client status and description. 263 func (a *allocReconciler) markStop(allocs allocSet, clientStatus, statusDescription string) { 264 for _, alloc := range allocs { 265 a.result.stop = append(a.result.stop, allocStopResult{ 266 alloc: alloc, 267 clientStatus: clientStatus, 268 statusDescription: statusDescription, 269 }) 270 } 271 } 272 273 // computeGroup reconciles state for a particular task group. It returns whether 274 // the deployment it is for is complete with regards to the task group. 275 func (a *allocReconciler) computeGroup(group string, all allocSet) bool { 276 // Create the desired update object for the group 277 desiredChanges := new(structs.DesiredUpdates) 278 a.result.desiredTGUpdates[group] = desiredChanges 279 280 // Get the task group. The task group may be nil if the job was updates such 281 // that the task group no longer exists 282 tg := a.job.LookupTaskGroup(group) 283 284 // If the task group is nil, then the task group has been removed so all we 285 // need to do is stop everything 286 if tg == nil { 287 untainted, migrate, lost := all.filterByTainted(a.taintedNodes) 288 a.markStop(untainted, "", allocNotNeeded) 289 a.markStop(migrate, "", allocNotNeeded) 290 a.markStop(lost, structs.AllocClientStatusLost, allocLost) 291 desiredChanges.Stop = uint64(len(untainted) + len(migrate) + len(lost)) 292 return true 293 } 294 295 // Get the deployment state for the group 296 var dstate *structs.DeploymentState 297 existingDeployment := false 298 if a.deployment != nil { 299 dstate, existingDeployment = a.deployment.TaskGroups[group] 300 } 301 if !existingDeployment { 302 autorevert := false 303 if tg.Update != nil && tg.Update.AutoRevert { 304 autorevert = true 305 } 306 dstate = &structs.DeploymentState{ 307 AutoRevert: autorevert, 308 } 309 } 310 311 // Filter batch allocations that do not need to be considered. 312 all, ignore := a.batchFiltration(all) 313 desiredChanges.Ignore += uint64(len(ignore)) 314 315 canaries, all := a.handleGroupCanaries(all, desiredChanges) 316 317 // Determine what set of allocations are on tainted nodes 318 untainted, migrate, lost := all.filterByTainted(a.taintedNodes) 319 320 // Determine what set of terminal allocations need to be rescheduled 321 untainted, reschedule := untainted.filterByRescheduleable(a.batch, tg.ReschedulePolicy) 322 323 // Create a structure for choosing names. Seed with the taken names which is 324 // the union of untainted and migrating nodes (includes canaries) 325 nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, reschedule)) 326 327 // Stop any unneeded allocations and update the untainted set to not 328 // included stopped allocations. 329 canaryState := dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted 330 stop := a.computeStop(tg, nameIndex, untainted, migrate, lost, canaries, canaryState) 331 desiredChanges.Stop += uint64(len(stop)) 332 untainted = untainted.difference(stop) 333 334 // Having stopped un-needed allocations, append the canaries to the existing 335 // set of untainted because they are promoted. This will cause them to be 336 // treated like non-canaries 337 if !canaryState { 338 untainted = untainted.union(canaries) 339 nameIndex.Set(canaries) 340 } 341 342 // Do inplace upgrades where possible and capture the set of upgrades that 343 // need to be done destructively. 344 ignore, inplace, destructive := a.computeUpdates(tg, untainted) 345 desiredChanges.Ignore += uint64(len(ignore)) 346 desiredChanges.InPlaceUpdate += uint64(len(inplace)) 347 if !existingDeployment { 348 dstate.DesiredTotal += len(destructive) + len(inplace) 349 } 350 351 // The fact that we have destructive updates and have less canaries than is 352 // desired means we need to create canaries 353 numDestructive := len(destructive) 354 strategy := tg.Update 355 canariesPromoted := dstate != nil && dstate.Promoted 356 requireCanary := numDestructive != 0 && strategy != nil && len(canaries) < strategy.Canary && !canariesPromoted 357 if requireCanary && !a.deploymentPaused && !a.deploymentFailed { 358 number := strategy.Canary - len(canaries) 359 number = helper.IntMin(numDestructive, number) 360 desiredChanges.Canary += uint64(number) 361 if !existingDeployment { 362 dstate.DesiredCanaries = strategy.Canary 363 } 364 365 for _, name := range nameIndex.NextCanaries(uint(number), canaries, destructive) { 366 a.result.place = append(a.result.place, allocPlaceResult{ 367 name: name, 368 canary: true, 369 taskGroup: tg, 370 }) 371 } 372 } 373 374 // Determine how many we can place 375 canaryState = dstate != nil && dstate.DesiredCanaries != 0 && !dstate.Promoted 376 limit := a.computeLimit(tg, untainted, destructive, migrate, canaryState) 377 378 // Place if: 379 // * The deployment is not paused or failed 380 // * Not placing any canaries 381 // * If there are any canaries that they have been promoted 382 place := a.computePlacements(tg, nameIndex, untainted, migrate, reschedule) 383 if !existingDeployment { 384 dstate.DesiredTotal += len(place) 385 } 386 387 // deploymentPlaceReady tracks whether the deployment is in a state where 388 // placements can be made without any other consideration. 389 deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState 390 391 if deploymentPlaceReady { 392 desiredChanges.Place += uint64(len(place)) 393 for _, p := range place { 394 a.result.place = append(a.result.place, p) 395 } 396 397 min := helper.IntMin(len(place), limit) 398 limit -= min 399 } else if !deploymentPlaceReady && len(lost) != 0 { 400 // We are in a situation where we shouldn't be placing more than we need 401 // to but we have lost allocations. It is a very weird user experience 402 // if you have a node go down and Nomad doesn't replace the allocations 403 // because the deployment is paused/failed so we only place to recover 404 // the lost allocations. 405 allowed := helper.IntMin(len(lost), len(place)) 406 desiredChanges.Place += uint64(allowed) 407 for _, p := range place[:allowed] { 408 a.result.place = append(a.result.place, p) 409 } 410 } 411 412 if deploymentPlaceReady { 413 // Do all destructive updates 414 min := helper.IntMin(len(destructive), limit) 415 limit -= min 416 desiredChanges.DestructiveUpdate += uint64(min) 417 desiredChanges.Ignore += uint64(len(destructive) - min) 418 for _, alloc := range destructive.nameOrder()[:min] { 419 a.result.destructiveUpdate = append(a.result.destructiveUpdate, allocDestructiveResult{ 420 placeName: alloc.Name, 421 placeTaskGroup: tg, 422 stopAlloc: alloc, 423 stopStatusDescription: allocUpdating, 424 }) 425 } 426 } else { 427 desiredChanges.Ignore += uint64(len(destructive)) 428 } 429 430 // Calculate the allowed number of changes and set the desired changes 431 // accordingly. 432 min := helper.IntMin(len(migrate), limit) 433 if !a.deploymentFailed && !a.deploymentPaused { 434 desiredChanges.Migrate += uint64(min) 435 desiredChanges.Ignore += uint64(len(migrate) - min) 436 } else { 437 desiredChanges.Stop += uint64(len(migrate)) 438 } 439 440 followup := false 441 migrated := 0 442 for _, alloc := range migrate.nameOrder() { 443 // If the deployment is failed or paused, don't replace it, just mark as stop. 444 if a.deploymentFailed || a.deploymentPaused { 445 a.result.stop = append(a.result.stop, allocStopResult{ 446 alloc: alloc, 447 statusDescription: allocNodeTainted, 448 }) 449 continue 450 } 451 452 if migrated >= limit { 453 followup = true 454 break 455 } 456 457 migrated++ 458 a.result.stop = append(a.result.stop, allocStopResult{ 459 alloc: alloc, 460 statusDescription: allocMigrating, 461 }) 462 a.result.place = append(a.result.place, allocPlaceResult{ 463 name: alloc.Name, 464 canary: false, 465 taskGroup: tg, 466 previousAlloc: alloc, 467 }) 468 } 469 470 // We need to create a followup evaluation. 471 if followup && strategy != nil && a.result.followupEvalWait < strategy.Stagger { 472 a.result.followupEvalWait = strategy.Stagger 473 } 474 475 // Create a new deployment if necessary 476 if !existingDeployment && strategy != nil && dstate.DesiredTotal != 0 { 477 // A previous group may have made the deployment already 478 if a.deployment == nil { 479 a.deployment = structs.NewDeployment(a.job) 480 a.result.deployment = a.deployment 481 } 482 483 // Attach the groups deployment state to the deployment 484 a.deployment.TaskGroups[group] = dstate 485 } 486 487 // deploymentComplete is whether the deployment is complete which largely 488 // means that no placements were made or desired to be made 489 deploymentComplete := len(destructive)+len(inplace)+len(place)+len(migrate) == 0 && !requireCanary 490 491 // Final check to see if the deployment is complete is to ensure everything 492 // is healthy 493 if deploymentComplete && a.deployment != nil { 494 partOf, _ := untainted.filterByDeployment(a.deployment.ID) 495 for _, alloc := range partOf { 496 if !alloc.DeploymentStatus.IsHealthy() { 497 deploymentComplete = false 498 break 499 } 500 } 501 } 502 503 return deploymentComplete 504 } 505 506 // batchFiltration filters batch allocations that should be ignored. These are 507 // allocations that are terminal from a previous job version. 508 func (a *allocReconciler) batchFiltration(all allocSet) (filtered, ignore allocSet) { 509 if !a.batch { 510 return all, nil 511 } 512 513 filtered = filtered.union(all) 514 ignored := make(map[string]*structs.Allocation) 515 516 // Ignore terminal batch jobs from older versions 517 for id, alloc := range filtered { 518 older := alloc.Job.Version < a.job.Version || alloc.Job.CreateIndex < a.job.CreateIndex 519 if older && alloc.TerminalStatus() { 520 delete(filtered, id) 521 ignored[id] = alloc 522 } 523 } 524 525 return filtered, ignored 526 } 527 528 // handleGroupCanaries handles the canaries for the group by stopping the 529 // unneeded ones and returning the current set of canaries and the updated total 530 // set of allocs for the group 531 func (a *allocReconciler) handleGroupCanaries(all allocSet, desiredChanges *structs.DesiredUpdates) (canaries, newAll allocSet) { 532 // Stop any canary from an older deployment or from a failed one 533 var stop []string 534 535 // Cancel any non-promoted canaries from the older deployment 536 if a.oldDeployment != nil { 537 for _, s := range a.oldDeployment.TaskGroups { 538 if !s.Promoted { 539 stop = append(stop, s.PlacedCanaries...) 540 } 541 } 542 } 543 544 // Cancel any non-promoted canaries from a failed deployment 545 if a.deployment != nil && a.deployment.Status == structs.DeploymentStatusFailed { 546 for _, s := range a.deployment.TaskGroups { 547 if !s.Promoted { 548 stop = append(stop, s.PlacedCanaries...) 549 } 550 } 551 } 552 553 // stopSet is the allocSet that contains the canaries we desire to stop from 554 // above. 555 stopSet := all.fromKeys(stop) 556 a.markStop(stopSet, "", allocNotNeeded) 557 desiredChanges.Stop += uint64(len(stopSet)) 558 all = all.difference(stopSet) 559 560 // Capture our current set of canaries and handle any migrations that are 561 // needed by just stopping them. 562 if a.deployment != nil { 563 var canaryIDs []string 564 for _, s := range a.deployment.TaskGroups { 565 canaryIDs = append(canaryIDs, s.PlacedCanaries...) 566 } 567 568 canaries = all.fromKeys(canaryIDs) 569 untainted, migrate, lost := canaries.filterByTainted(a.taintedNodes) 570 a.markStop(migrate, "", allocMigrating) 571 a.markStop(lost, structs.AllocClientStatusLost, allocLost) 572 573 canaries = untainted 574 all = all.difference(migrate, lost) 575 } 576 577 return canaries, all 578 } 579 580 // computeLimit returns the placement limit for a particular group. The inputs 581 // are the group definition, the untainted, destructive, and migrate allocation 582 // set and whether we are in a canary state. 583 func (a *allocReconciler) computeLimit(group *structs.TaskGroup, untainted, destructive, migrate allocSet, canaryState bool) int { 584 // If there is no update stategy or deployment for the group we can deploy 585 // as many as the group has 586 if group.Update == nil || len(destructive)+len(migrate) == 0 { 587 return group.Count 588 } else if a.deploymentPaused || a.deploymentFailed { 589 // If the deployment is paused or failed, do not create anything else 590 return 0 591 } 592 593 // If we have canaries and they have not been promoted the limit is 0 594 if canaryState { 595 return 0 596 } 597 598 // If we have been promoted or there are no canaries, the limit is the 599 // configured MaxParallel minus any outstanding non-healthy alloc for the 600 // deployment 601 limit := group.Update.MaxParallel 602 if a.deployment != nil { 603 partOf, _ := untainted.filterByDeployment(a.deployment.ID) 604 for _, alloc := range partOf { 605 // An unhealthy allocation means nothing else should be happen. 606 if alloc.DeploymentStatus.IsUnhealthy() { 607 return 0 608 } 609 610 if !alloc.DeploymentStatus.IsHealthy() { 611 limit-- 612 } 613 } 614 } 615 616 // The limit can be less than zero in the case that the job was changed such 617 // that it required destructive changes and the count was scaled up. 618 if limit < 0 { 619 return 0 620 } 621 622 return limit 623 } 624 625 // computePlacement returns the set of allocations to place given the group 626 // definition, the set of untainted, migrating and reschedule allocations for the group. 627 func (a *allocReconciler) computePlacements(group *structs.TaskGroup, 628 nameIndex *allocNameIndex, untainted, migrate allocSet, reschedule allocSet) []allocPlaceResult { 629 630 // Hot path the nothing to do case 631 existing := len(untainted) + len(migrate) 632 if existing >= group.Count { 633 return nil 634 } 635 var place []allocPlaceResult 636 // Add rescheduled placement results 637 // Any allocations being rescheduled will remain at DesiredStatusRun ClientStatusFailed 638 for _, alloc := range reschedule { 639 place = append(place, allocPlaceResult{ 640 name: alloc.Name, 641 taskGroup: group, 642 previousAlloc: alloc, 643 reschedule: true, 644 }) 645 existing += 1 646 if existing == group.Count { 647 break 648 } 649 } 650 // Add remaining placement results 651 if existing < group.Count { 652 for _, name := range nameIndex.Next(uint(group.Count - existing)) { 653 place = append(place, allocPlaceResult{ 654 name: name, 655 taskGroup: group, 656 }) 657 } 658 } 659 660 return place 661 } 662 663 // computeStop returns the set of allocations that are marked for stopping given 664 // the group definition, the set of allocations in various states and whether we 665 // are canarying. 666 func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *allocNameIndex, 667 untainted, migrate, lost, canaries allocSet, canaryState bool) allocSet { 668 669 // Mark all lost allocations for stop. Previous allocation doesn't matter 670 // here since it is on a lost node 671 var stop allocSet 672 stop = stop.union(lost) 673 a.markStop(lost, structs.AllocClientStatusLost, allocLost) 674 675 // If we are still deploying or creating canaries, don't stop them 676 if canaryState { 677 untainted = untainted.difference(canaries) 678 } 679 680 // Hot path the nothing to do case 681 remove := len(untainted) + len(migrate) - group.Count 682 if remove <= 0 { 683 return stop 684 } 685 686 // Filter out any terminal allocations from the untainted set 687 // This is so that we don't try to mark them as stopped redundantly 688 untainted = filterByTerminal(untainted) 689 690 // Prefer stopping any alloc that has the same name as the canaries if we 691 // are promoted 692 if !canaryState && len(canaries) != 0 { 693 canaryNames := canaries.nameSet() 694 for id, alloc := range untainted.difference(canaries) { 695 if _, match := canaryNames[alloc.Name]; match { 696 stop[id] = alloc 697 a.result.stop = append(a.result.stop, allocStopResult{ 698 alloc: alloc, 699 statusDescription: allocNotNeeded, 700 }) 701 delete(untainted, id) 702 703 remove-- 704 if remove == 0 { 705 return stop 706 } 707 } 708 } 709 } 710 711 // Prefer selecting from the migrating set before stopping existing allocs 712 if len(migrate) != 0 { 713 mNames := newAllocNameIndex(a.jobID, group.Name, group.Count, migrate) 714 removeNames := mNames.Highest(uint(remove)) 715 for id, alloc := range migrate { 716 if _, match := removeNames[alloc.Name]; !match { 717 continue 718 } 719 a.result.stop = append(a.result.stop, allocStopResult{ 720 alloc: alloc, 721 statusDescription: allocNotNeeded, 722 }) 723 delete(migrate, id) 724 stop[id] = alloc 725 nameIndex.UnsetIndex(alloc.Index()) 726 727 remove-- 728 if remove == 0 { 729 return stop 730 } 731 } 732 } 733 734 // Select the allocs with the highest count to remove 735 removeNames := nameIndex.Highest(uint(remove)) 736 for id, alloc := range untainted { 737 if _, ok := removeNames[alloc.Name]; ok { 738 stop[id] = alloc 739 a.result.stop = append(a.result.stop, allocStopResult{ 740 alloc: alloc, 741 statusDescription: allocNotNeeded, 742 }) 743 delete(untainted, id) 744 745 remove-- 746 if remove == 0 { 747 return stop 748 } 749 } 750 } 751 752 // It is possible that we didn't stop as many as we should have if there 753 // were allocations with duplicate names. 754 for id, alloc := range untainted { 755 stop[id] = alloc 756 a.result.stop = append(a.result.stop, allocStopResult{ 757 alloc: alloc, 758 statusDescription: allocNotNeeded, 759 }) 760 delete(untainted, id) 761 762 remove-- 763 if remove == 0 { 764 return stop 765 } 766 } 767 768 return stop 769 } 770 771 // computeUpdates determines which allocations for the passed group require 772 // updates. Three groups are returned: 773 // 1. Those that require no upgrades 774 // 2. Those that can be upgraded in-place. These are added to the results 775 // automatically since the function contains the correct state to do so, 776 // 3. Those that require destructive updates 777 func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted allocSet) (ignore, inplace, destructive allocSet) { 778 // Determine the set of allocations that need to be updated 779 ignore = make(map[string]*structs.Allocation) 780 inplace = make(map[string]*structs.Allocation) 781 destructive = make(map[string]*structs.Allocation) 782 783 for _, alloc := range untainted { 784 ignoreChange, destructiveChange, inplaceAlloc := a.allocUpdateFn(alloc, a.job, group) 785 if ignoreChange { 786 ignore[alloc.ID] = alloc 787 } else if destructiveChange { 788 destructive[alloc.ID] = alloc 789 } else { 790 // Attach the deployment ID and and clear the health if the 791 // deployment has changed 792 inplace[alloc.ID] = alloc 793 a.result.inplaceUpdate = append(a.result.inplaceUpdate, inplaceAlloc) 794 } 795 } 796 797 return 798 }