github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/scheduler/reconcile_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "log" 6 "os" 7 "reflect" 8 "regexp" 9 "strconv" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/kr/pretty" 18 ) 19 20 /* 21 Basic Tests: 22 √ Place when there is nothing in the cluster 23 √ Place remainder when there is some in the cluster 24 √ Scale down from n to n-m where n != m 25 √ Scale down from n to zero 26 √ Inplace upgrade test 27 √ Inplace upgrade and scale up test 28 √ Inplace upgrade and scale down test 29 √ Destructive upgrade 30 √ Destructive upgrade and scale up test 31 √ Destructive upgrade and scale down test 32 √ Handle lost nodes 33 √ Handle lost nodes and scale up 34 √ Handle lost nodes and scale down 35 √ Handle draining nodes 36 √ Handle draining nodes and scale up 37 √ Handle draining nodes and scale down 38 √ Handle task group being removed 39 √ Handle job being stopped both as .Stopped and nil 40 √ Place more that one group 41 √ Handle rescheduling failed allocs for batch jobs 42 √ Handle rescheduling failed allocs for service jobs 43 √ Previously rescheduled allocs should not be rescheduled again 44 45 Update stanza Tests: 46 √ Stopped job cancels any active deployment 47 √ Stopped job doesn't cancel terminal deployment 48 √ JobIndex change cancels any active deployment 49 √ JobIndex change doesn't cancels any terminal deployment 50 √ Destructive changes create deployment and get rolled out via max_parallelism 51 √ Don't create a deployment if there are no changes 52 √ Deployment created by all inplace updates 53 √ Paused or failed deployment doesn't create any more canaries 54 √ Paused or failed deployment doesn't do any placements unless replacing lost allocs 55 √ Paused or failed deployment doesn't do destructive updates 56 √ Paused does do migrations 57 √ Failed deployment doesn't do migrations 58 √ Canary that is on a draining node 59 √ Canary that is on a lost node 60 √ Stop old canaries 61 √ Create new canaries on job change 62 √ Create new canaries on job change while scaling up 63 √ Create new canaries on job change while scaling down 64 √ Fill canaries if partial placement 65 √ Promote canaries unblocks max_parallel 66 √ Promote canaries when canaries == count 67 √ Only place as many as are healthy in deployment 68 √ Limit calculation accounts for healthy allocs on migrating/lost nodes 69 √ Failed deployment should not place anything 70 √ Run after canaries have been promoted, new allocs have been rolled out and there is no deployment 71 √ Failed deployment cancels non-promoted task groups 72 √ Failed deployment and updated job works 73 √ Finished deployment gets marked as complete 74 √ The stagger is correctly calculated when it is applied across multiple task groups. 75 √ Change job change while scaling up 76 √ Update the job when all allocations from the previous job haven't been placed yet. 77 √ Paused or failed deployment doesn't do any rescheduling of failed allocs 78 √ Running deployment with failed allocs doesn't do any rescheduling of failed allocs 79 */ 80 81 var ( 82 canaryUpdate = &structs.UpdateStrategy{ 83 Canary: 2, 84 MaxParallel: 2, 85 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 86 MinHealthyTime: 10 * time.Second, 87 HealthyDeadline: 10 * time.Minute, 88 Stagger: 31 * time.Second, 89 } 90 91 noCanaryUpdate = &structs.UpdateStrategy{ 92 MaxParallel: 4, 93 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 94 MinHealthyTime: 10 * time.Second, 95 HealthyDeadline: 10 * time.Minute, 96 Stagger: 31 * time.Second, 97 } 98 ) 99 100 func testLogger() *log.Logger { 101 return log.New(os.Stderr, "", log.LstdFlags) 102 } 103 104 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 105 return true, false, nil 106 } 107 108 func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 109 return false, true, nil 110 } 111 112 func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 113 // Create a shallow copy 114 newAlloc := new(structs.Allocation) 115 *newAlloc = *existing 116 newAlloc.TaskResources = make(map[string]*structs.Resources) 117 118 // Use the new task resources but keep the network from the old 119 for _, task := range newTG.Tasks { 120 r := task.Resources.Copy() 121 r.Networks = existing.TaskResources[task.Name].Networks 122 newAlloc.TaskResources[task.Name] = r 123 } 124 125 return false, false, newAlloc 126 } 127 128 func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { 129 return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 130 if fn, ok := handled[existing.ID]; ok { 131 return fn(existing, newJob, newTG) 132 } 133 134 return unhandled(existing, newJob, newTG) 135 } 136 } 137 138 var ( 139 // AllocationIndexRegex is a regular expression to find the allocation index. 140 allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 141 ) 142 143 // allocNameToIndex returns the index of the allocation. 144 func allocNameToIndex(name string) uint { 145 matches := allocationIndexRegex.FindStringSubmatch(name) 146 if len(matches) != 2 { 147 return 0 148 } 149 150 index, err := strconv.Atoi(matches[1]) 151 if err != nil { 152 return 0 153 } 154 155 return uint(index) 156 } 157 158 func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { 159 t.Helper() 160 m := make(map[uint]int) 161 for _, i := range indexes { 162 m[uint(i)] += 1 163 } 164 165 for _, n := range names { 166 index := allocNameToIndex(n) 167 val, contained := m[index] 168 if !contained { 169 t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names) 170 } 171 172 val-- 173 if val < 0 { 174 t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names) 175 } 176 m[index] = val 177 } 178 179 for k, remainder := range m { 180 if remainder != 0 { 181 t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names) 182 } 183 } 184 } 185 186 func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { 187 t.Helper() 188 canaryIndex := make(map[string]struct{}) 189 for _, state := range d.TaskGroups { 190 for _, c := range state.PlacedCanaries { 191 canaryIndex[c] = struct{}{} 192 } 193 } 194 195 for _, s := range stop { 196 if _, ok := canaryIndex[s.alloc.ID]; ok { 197 t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) 198 } 199 } 200 } 201 202 func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { 203 t.Helper() 204 names := make(map[string]struct{}, numPrevious) 205 206 found := 0 207 for _, p := range place { 208 if _, ok := names[p.name]; ok { 209 t.Fatalf("Name %q already placed", p.name) 210 } 211 names[p.name] = struct{}{} 212 213 if p.previousAlloc == nil { 214 continue 215 } 216 217 if act := p.previousAlloc.Name; p.name != act { 218 t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) 219 } 220 found++ 221 } 222 if numPrevious != found { 223 t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) 224 } 225 } 226 227 func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { 228 t.Helper() 229 names := make(map[string]struct{}, numRescheduled) 230 231 found := 0 232 for _, p := range place { 233 if _, ok := names[p.name]; ok { 234 t.Fatalf("Name %q already placed", p.name) 235 } 236 names[p.name] = struct{}{} 237 238 if p.previousAlloc == nil { 239 continue 240 } 241 if p.reschedule { 242 found++ 243 } 244 245 } 246 if numRescheduled != found { 247 t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found) 248 } 249 } 250 251 func intRange(pairs ...int) []int { 252 if len(pairs)%2 != 0 { 253 return nil 254 } 255 256 var r []int 257 for i := 0; i < len(pairs); i += 2 { 258 for j := pairs[i]; j <= pairs[i+1]; j++ { 259 r = append(r, j) 260 } 261 } 262 return r 263 } 264 265 func placeResultsToNames(place []allocPlaceResult) []string { 266 names := make([]string, 0, len(place)) 267 for _, p := range place { 268 names = append(names, p.name) 269 } 270 return names 271 } 272 273 func destructiveResultsToNames(destructive []allocDestructiveResult) []string { 274 names := make([]string, 0, len(destructive)) 275 for _, d := range destructive { 276 names = append(names, d.placeName) 277 } 278 return names 279 } 280 281 func stopResultsToNames(stop []allocStopResult) []string { 282 names := make([]string, 0, len(stop)) 283 for _, s := range stop { 284 names = append(names, s.alloc.Name) 285 } 286 return names 287 } 288 289 func allocsToNames(allocs []*structs.Allocation) []string { 290 names := make([]string, 0, len(allocs)) 291 for _, a := range allocs { 292 names = append(names, a.Name) 293 } 294 return names 295 } 296 297 type resultExpectation struct { 298 createDeployment *structs.Deployment 299 deploymentUpdates []*structs.DeploymentStatusUpdate 300 place int 301 destructive int 302 inplace int 303 stop int 304 desiredTGUpdates map[string]*structs.DesiredUpdates 305 followupEvalWait time.Duration 306 } 307 308 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { 309 t.Helper() 310 if exp.createDeployment != nil && r.deployment == nil { 311 t.Fatalf("Expect a created deployment got none") 312 } else if exp.createDeployment == nil && r.deployment != nil { 313 t.Fatalf("Expect no created deployment; got %#v", r.deployment) 314 } else if exp.createDeployment != nil && r.deployment != nil { 315 // Clear the deployment ID 316 r.deployment.ID, exp.createDeployment.ID = "", "" 317 if !reflect.DeepEqual(r.deployment, exp.createDeployment) { 318 t.Fatalf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", 319 r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment)) 320 } 321 } 322 323 if !reflect.DeepEqual(r.deploymentUpdates, exp.deploymentUpdates) { 324 t.Fatalf("Unexpected deploymentUpdates: %v", pretty.Diff(r.deploymentUpdates, exp.deploymentUpdates)) 325 } 326 if l := len(r.place); l != exp.place { 327 t.Fatalf("Expected %d placements; got %d", exp.place, l) 328 } 329 if l := len(r.destructiveUpdate); l != exp.destructive { 330 t.Fatalf("Expected %d destructive; got %d", exp.destructive, l) 331 } 332 if l := len(r.inplaceUpdate); l != exp.inplace { 333 t.Fatalf("Expected %d inplaceUpdate; got %d", exp.inplace, l) 334 } 335 if l := len(r.stop); l != exp.stop { 336 t.Fatalf("Expected %d stops; got %d", exp.stop, l) 337 } 338 if l := len(r.desiredTGUpdates); l != len(exp.desiredTGUpdates) { 339 t.Fatalf("Expected %d task group desired tg updates annotations; got %d", len(exp.desiredTGUpdates), l) 340 } 341 if r.followupEvalWait != exp.followupEvalWait { 342 t.Fatalf("Unexpected followup eval wait time. Got %v; want %v", r.followupEvalWait, exp.followupEvalWait) 343 } 344 345 // Check the desired updates happened 346 for group, desired := range exp.desiredTGUpdates { 347 act, ok := r.desiredTGUpdates[group] 348 if !ok { 349 t.Fatalf("Expected desired updates for group %q", group) 350 } 351 352 if !reflect.DeepEqual(act, desired) { 353 t.Fatalf("Unexpected annotations for group %q: %v", group, pretty.Diff(act, desired)) 354 } 355 } 356 } 357 358 // Tests the reconciler properly handles placements for a job that has no 359 // existing allocations 360 func TestReconciler_Place_NoExisting(t *testing.T) { 361 job := mock.Job() 362 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil) 363 r := reconciler.Compute() 364 365 // Assert the correct results 366 assertResults(t, r, &resultExpectation{ 367 createDeployment: nil, 368 deploymentUpdates: nil, 369 place: 10, 370 inplace: 0, 371 stop: 0, 372 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 373 job.TaskGroups[0].Name: { 374 Place: 10, 375 }, 376 }, 377 }) 378 379 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 380 } 381 382 // Tests the reconciler properly handles placements for a job that has some 383 // existing allocations 384 func TestReconciler_Place_Existing(t *testing.T) { 385 job := mock.Job() 386 387 // Create 3 existing allocations 388 var allocs []*structs.Allocation 389 for i := 0; i < 5; i++ { 390 alloc := mock.Alloc() 391 alloc.Job = job 392 alloc.JobID = job.ID 393 alloc.NodeID = uuid.Generate() 394 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 395 allocs = append(allocs, alloc) 396 } 397 398 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 399 r := reconciler.Compute() 400 401 // Assert the correct results 402 assertResults(t, r, &resultExpectation{ 403 createDeployment: nil, 404 deploymentUpdates: nil, 405 place: 5, 406 inplace: 0, 407 stop: 0, 408 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 409 job.TaskGroups[0].Name: { 410 Place: 5, 411 Ignore: 5, 412 }, 413 }, 414 }) 415 416 assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) 417 } 418 419 // Tests the reconciler properly handles stopping allocations for a job that has 420 // scaled down 421 func TestReconciler_ScaleDown_Partial(t *testing.T) { 422 // Has desired 10 423 job := mock.Job() 424 425 // Create 20 existing allocations 426 var allocs []*structs.Allocation 427 for i := 0; i < 20; i++ { 428 alloc := mock.Alloc() 429 alloc.Job = job 430 alloc.JobID = job.ID 431 alloc.NodeID = uuid.Generate() 432 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 433 allocs = append(allocs, alloc) 434 } 435 436 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 437 r := reconciler.Compute() 438 439 // Assert the correct results 440 assertResults(t, r, &resultExpectation{ 441 createDeployment: nil, 442 deploymentUpdates: nil, 443 place: 0, 444 inplace: 0, 445 stop: 10, 446 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 447 job.TaskGroups[0].Name: { 448 Ignore: 10, 449 Stop: 10, 450 }, 451 }, 452 }) 453 454 assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) 455 } 456 457 // Tests the reconciler properly handles stopping allocations for a job that has 458 // scaled down to zero desired 459 func TestReconciler_ScaleDown_Zero(t *testing.T) { 460 // Set desired 0 461 job := mock.Job() 462 job.TaskGroups[0].Count = 0 463 464 // Create 20 existing allocations 465 var allocs []*structs.Allocation 466 for i := 0; i < 20; i++ { 467 alloc := mock.Alloc() 468 alloc.Job = job 469 alloc.JobID = job.ID 470 alloc.NodeID = uuid.Generate() 471 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 472 allocs = append(allocs, alloc) 473 } 474 475 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 476 r := reconciler.Compute() 477 478 // Assert the correct results 479 assertResults(t, r, &resultExpectation{ 480 createDeployment: nil, 481 deploymentUpdates: nil, 482 place: 0, 483 inplace: 0, 484 stop: 20, 485 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 486 job.TaskGroups[0].Name: { 487 Stop: 20, 488 }, 489 }, 490 }) 491 492 assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) 493 } 494 495 // Tests the reconciler properly handles stopping allocations for a job that has 496 // scaled down to zero desired where allocs have duplicate names 497 func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { 498 // Set desired 0 499 job := mock.Job() 500 job.TaskGroups[0].Count = 0 501 502 // Create 20 existing allocations 503 var allocs []*structs.Allocation 504 var expectedStopped []int 505 for i := 0; i < 20; i++ { 506 alloc := mock.Alloc() 507 alloc.Job = job 508 alloc.JobID = job.ID 509 alloc.NodeID = uuid.Generate() 510 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 511 allocs = append(allocs, alloc) 512 expectedStopped = append(expectedStopped, i%2) 513 } 514 515 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 516 r := reconciler.Compute() 517 518 // Assert the correct results 519 assertResults(t, r, &resultExpectation{ 520 createDeployment: nil, 521 deploymentUpdates: nil, 522 place: 0, 523 inplace: 0, 524 stop: 20, 525 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 526 job.TaskGroups[0].Name: { 527 Stop: 20, 528 }, 529 }, 530 }) 531 532 assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) 533 } 534 535 // Tests the reconciler properly handles inplace upgrading allocations 536 func TestReconciler_Inplace(t *testing.T) { 537 job := mock.Job() 538 539 // Create 10 existing allocations 540 var allocs []*structs.Allocation 541 for i := 0; i < 10; i++ { 542 alloc := mock.Alloc() 543 alloc.Job = job 544 alloc.JobID = job.ID 545 alloc.NodeID = uuid.Generate() 546 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 547 allocs = append(allocs, alloc) 548 } 549 550 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil) 551 r := reconciler.Compute() 552 553 // Assert the correct results 554 assertResults(t, r, &resultExpectation{ 555 createDeployment: nil, 556 deploymentUpdates: nil, 557 place: 0, 558 inplace: 10, 559 stop: 0, 560 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 561 job.TaskGroups[0].Name: { 562 InPlaceUpdate: 10, 563 }, 564 }, 565 }) 566 567 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 568 } 569 570 // Tests the reconciler properly handles inplace upgrading allocations while 571 // scaling up 572 func TestReconciler_Inplace_ScaleUp(t *testing.T) { 573 // Set desired 15 574 job := mock.Job() 575 job.TaskGroups[0].Count = 15 576 577 // Create 10 existing allocations 578 var allocs []*structs.Allocation 579 for i := 0; i < 10; i++ { 580 alloc := mock.Alloc() 581 alloc.Job = job 582 alloc.JobID = job.ID 583 alloc.NodeID = uuid.Generate() 584 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 585 allocs = append(allocs, alloc) 586 } 587 588 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil) 589 r := reconciler.Compute() 590 591 // Assert the correct results 592 assertResults(t, r, &resultExpectation{ 593 createDeployment: nil, 594 deploymentUpdates: nil, 595 place: 5, 596 inplace: 10, 597 stop: 0, 598 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 599 job.TaskGroups[0].Name: { 600 Place: 5, 601 InPlaceUpdate: 10, 602 }, 603 }, 604 }) 605 606 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 607 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 608 } 609 610 // Tests the reconciler properly handles inplace upgrading allocations while 611 // scaling down 612 func TestReconciler_Inplace_ScaleDown(t *testing.T) { 613 // Set desired 5 614 job := mock.Job() 615 job.TaskGroups[0].Count = 5 616 617 // Create 10 existing allocations 618 var allocs []*structs.Allocation 619 for i := 0; i < 10; i++ { 620 alloc := mock.Alloc() 621 alloc.Job = job 622 alloc.JobID = job.ID 623 alloc.NodeID = uuid.Generate() 624 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 625 allocs = append(allocs, alloc) 626 } 627 628 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil) 629 r := reconciler.Compute() 630 631 // Assert the correct results 632 assertResults(t, r, &resultExpectation{ 633 createDeployment: nil, 634 deploymentUpdates: nil, 635 place: 0, 636 inplace: 5, 637 stop: 5, 638 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 639 job.TaskGroups[0].Name: { 640 Stop: 5, 641 InPlaceUpdate: 5, 642 }, 643 }, 644 }) 645 646 assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) 647 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 648 } 649 650 // Tests the reconciler properly handles destructive upgrading allocations 651 func TestReconciler_Destructive(t *testing.T) { 652 job := mock.Job() 653 654 // Create 10 existing allocations 655 var allocs []*structs.Allocation 656 for i := 0; i < 10; i++ { 657 alloc := mock.Alloc() 658 alloc.Job = job 659 alloc.JobID = job.ID 660 alloc.NodeID = uuid.Generate() 661 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 662 allocs = append(allocs, alloc) 663 } 664 665 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 666 r := reconciler.Compute() 667 668 // Assert the correct results 669 assertResults(t, r, &resultExpectation{ 670 createDeployment: nil, 671 deploymentUpdates: nil, 672 destructive: 10, 673 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 674 job.TaskGroups[0].Name: { 675 DestructiveUpdate: 10, 676 }, 677 }, 678 }) 679 680 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 681 } 682 683 // Tests the reconciler properly handles destructive upgrading allocations while 684 // scaling up 685 func TestReconciler_Destructive_ScaleUp(t *testing.T) { 686 // Set desired 15 687 job := mock.Job() 688 job.TaskGroups[0].Count = 15 689 690 // Create 10 existing allocations 691 var allocs []*structs.Allocation 692 for i := 0; i < 10; i++ { 693 alloc := mock.Alloc() 694 alloc.Job = job 695 alloc.JobID = job.ID 696 alloc.NodeID = uuid.Generate() 697 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 698 allocs = append(allocs, alloc) 699 } 700 701 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 702 r := reconciler.Compute() 703 704 // Assert the correct results 705 assertResults(t, r, &resultExpectation{ 706 createDeployment: nil, 707 deploymentUpdates: nil, 708 place: 5, 709 destructive: 10, 710 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 711 job.TaskGroups[0].Name: { 712 Place: 5, 713 DestructiveUpdate: 10, 714 }, 715 }, 716 }) 717 718 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 719 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 720 } 721 722 // Tests the reconciler properly handles destructive upgrading allocations while 723 // scaling down 724 func TestReconciler_Destructive_ScaleDown(t *testing.T) { 725 // Set desired 5 726 job := mock.Job() 727 job.TaskGroups[0].Count = 5 728 729 // Create 10 existing allocations 730 var allocs []*structs.Allocation 731 for i := 0; i < 10; i++ { 732 alloc := mock.Alloc() 733 alloc.Job = job 734 alloc.JobID = job.ID 735 alloc.NodeID = uuid.Generate() 736 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 737 allocs = append(allocs, alloc) 738 } 739 740 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 741 r := reconciler.Compute() 742 743 // Assert the correct results 744 assertResults(t, r, &resultExpectation{ 745 createDeployment: nil, 746 deploymentUpdates: nil, 747 destructive: 5, 748 stop: 5, 749 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 750 job.TaskGroups[0].Name: { 751 Stop: 5, 752 DestructiveUpdate: 5, 753 }, 754 }, 755 }) 756 757 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 758 assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) 759 } 760 761 // Tests the reconciler properly handles lost nodes with allocations 762 func TestReconciler_LostNode(t *testing.T) { 763 job := mock.Job() 764 765 // Create 10 existing allocations 766 var allocs []*structs.Allocation 767 for i := 0; i < 10; i++ { 768 alloc := mock.Alloc() 769 alloc.Job = job 770 alloc.JobID = job.ID 771 alloc.NodeID = uuid.Generate() 772 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 773 allocs = append(allocs, alloc) 774 } 775 776 // Build a map of tainted nodes 777 tainted := make(map[string]*structs.Node, 2) 778 for i := 0; i < 2; i++ { 779 n := mock.Node() 780 n.ID = allocs[i].NodeID 781 n.Status = structs.NodeStatusDown 782 tainted[n.ID] = n 783 } 784 785 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 786 r := reconciler.Compute() 787 788 // Assert the correct results 789 assertResults(t, r, &resultExpectation{ 790 createDeployment: nil, 791 deploymentUpdates: nil, 792 place: 2, 793 inplace: 0, 794 stop: 2, 795 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 796 job.TaskGroups[0].Name: { 797 Place: 2, 798 Stop: 2, 799 Ignore: 8, 800 }, 801 }, 802 }) 803 804 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 805 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 806 } 807 808 // Tests the reconciler properly handles lost nodes with allocations while 809 // scaling up 810 func TestReconciler_LostNode_ScaleUp(t *testing.T) { 811 // Set desired 15 812 job := mock.Job() 813 job.TaskGroups[0].Count = 15 814 815 // Create 10 existing allocations 816 var allocs []*structs.Allocation 817 for i := 0; i < 10; i++ { 818 alloc := mock.Alloc() 819 alloc.Job = job 820 alloc.JobID = job.ID 821 alloc.NodeID = uuid.Generate() 822 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 823 allocs = append(allocs, alloc) 824 } 825 826 // Build a map of tainted nodes 827 tainted := make(map[string]*structs.Node, 2) 828 for i := 0; i < 2; i++ { 829 n := mock.Node() 830 n.ID = allocs[i].NodeID 831 n.Status = structs.NodeStatusDown 832 tainted[n.ID] = n 833 } 834 835 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 836 r := reconciler.Compute() 837 838 // Assert the correct results 839 assertResults(t, r, &resultExpectation{ 840 createDeployment: nil, 841 deploymentUpdates: nil, 842 place: 7, 843 inplace: 0, 844 stop: 2, 845 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 846 job.TaskGroups[0].Name: { 847 Place: 7, 848 Stop: 2, 849 Ignore: 8, 850 }, 851 }, 852 }) 853 854 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 855 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 856 } 857 858 // Tests the reconciler properly handles lost nodes with allocations while 859 // scaling down 860 func TestReconciler_LostNode_ScaleDown(t *testing.T) { 861 // Set desired 5 862 job := mock.Job() 863 job.TaskGroups[0].Count = 5 864 865 // Create 10 existing allocations 866 var allocs []*structs.Allocation 867 for i := 0; i < 10; i++ { 868 alloc := mock.Alloc() 869 alloc.Job = job 870 alloc.JobID = job.ID 871 alloc.NodeID = uuid.Generate() 872 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 873 allocs = append(allocs, alloc) 874 } 875 876 // Build a map of tainted nodes 877 tainted := make(map[string]*structs.Node, 2) 878 for i := 0; i < 2; i++ { 879 n := mock.Node() 880 n.ID = allocs[i].NodeID 881 n.Status = structs.NodeStatusDown 882 tainted[n.ID] = n 883 } 884 885 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 886 r := reconciler.Compute() 887 888 // Assert the correct results 889 assertResults(t, r, &resultExpectation{ 890 createDeployment: nil, 891 deploymentUpdates: nil, 892 place: 0, 893 inplace: 0, 894 stop: 5, 895 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 896 job.TaskGroups[0].Name: { 897 Stop: 5, 898 Ignore: 5, 899 }, 900 }, 901 }) 902 903 assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) 904 } 905 906 // Tests the reconciler properly handles draining nodes with allocations 907 func TestReconciler_DrainNode(t *testing.T) { 908 job := mock.Job() 909 910 // Create 10 existing allocations 911 var allocs []*structs.Allocation 912 for i := 0; i < 10; i++ { 913 alloc := mock.Alloc() 914 alloc.Job = job 915 alloc.JobID = job.ID 916 alloc.NodeID = uuid.Generate() 917 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 918 allocs = append(allocs, alloc) 919 } 920 921 // Build a map of tainted nodes 922 tainted := make(map[string]*structs.Node, 2) 923 for i := 0; i < 2; i++ { 924 n := mock.Node() 925 n.ID = allocs[i].NodeID 926 n.Drain = true 927 tainted[n.ID] = n 928 } 929 930 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 931 r := reconciler.Compute() 932 933 // Assert the correct results 934 assertResults(t, r, &resultExpectation{ 935 createDeployment: nil, 936 deploymentUpdates: nil, 937 place: 2, 938 inplace: 0, 939 stop: 2, 940 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 941 job.TaskGroups[0].Name: { 942 Migrate: 2, 943 Ignore: 8, 944 }, 945 }, 946 }) 947 948 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 949 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 950 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 951 // These should not have the reschedule field set 952 assertPlacementsAreRescheduled(t, 0, r.place) 953 } 954 955 // Tests the reconciler properly handles draining nodes with allocations while 956 // scaling up 957 func TestReconciler_DrainNode_ScaleUp(t *testing.T) { 958 // Set desired 15 959 job := mock.Job() 960 job.TaskGroups[0].Count = 15 961 962 // Create 10 existing allocations 963 var allocs []*structs.Allocation 964 for i := 0; i < 10; i++ { 965 alloc := mock.Alloc() 966 alloc.Job = job 967 alloc.JobID = job.ID 968 alloc.NodeID = uuid.Generate() 969 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 970 allocs = append(allocs, alloc) 971 } 972 973 // Build a map of tainted nodes 974 tainted := make(map[string]*structs.Node, 2) 975 for i := 0; i < 2; i++ { 976 n := mock.Node() 977 n.ID = allocs[i].NodeID 978 n.Drain = true 979 tainted[n.ID] = n 980 } 981 982 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 983 r := reconciler.Compute() 984 985 // Assert the correct results 986 assertResults(t, r, &resultExpectation{ 987 createDeployment: nil, 988 deploymentUpdates: nil, 989 place: 7, 990 inplace: 0, 991 stop: 2, 992 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 993 job.TaskGroups[0].Name: { 994 Place: 5, 995 Migrate: 2, 996 Ignore: 8, 997 }, 998 }, 999 }) 1000 1001 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 1002 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 1003 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 1004 // These should not have the reschedule field set 1005 assertPlacementsAreRescheduled(t, 0, r.place) 1006 } 1007 1008 // Tests the reconciler properly handles draining nodes with allocations while 1009 // scaling down 1010 func TestReconciler_DrainNode_ScaleDown(t *testing.T) { 1011 // Set desired 8 1012 job := mock.Job() 1013 job.TaskGroups[0].Count = 8 1014 1015 // Create 10 existing allocations 1016 var allocs []*structs.Allocation 1017 for i := 0; i < 10; i++ { 1018 alloc := mock.Alloc() 1019 alloc.Job = job 1020 alloc.JobID = job.ID 1021 alloc.NodeID = uuid.Generate() 1022 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1023 allocs = append(allocs, alloc) 1024 } 1025 1026 // Build a map of tainted nodes 1027 tainted := make(map[string]*structs.Node, 3) 1028 for i := 0; i < 3; i++ { 1029 n := mock.Node() 1030 n.ID = allocs[i].NodeID 1031 n.Drain = true 1032 tainted[n.ID] = n 1033 } 1034 1035 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 1036 r := reconciler.Compute() 1037 1038 // Assert the correct results 1039 assertResults(t, r, &resultExpectation{ 1040 createDeployment: nil, 1041 deploymentUpdates: nil, 1042 place: 1, 1043 inplace: 0, 1044 stop: 3, 1045 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1046 job.TaskGroups[0].Name: { 1047 Migrate: 1, 1048 Stop: 2, 1049 Ignore: 7, 1050 }, 1051 }, 1052 }) 1053 1054 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 1055 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 1056 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1057 // These should not have the reschedule field set 1058 assertPlacementsAreRescheduled(t, 0, r.place) 1059 } 1060 1061 // Tests the reconciler properly handles a task group being removed 1062 func TestReconciler_RemovedTG(t *testing.T) { 1063 job := mock.Job() 1064 1065 // Create 10 allocations for a tg that no longer exists 1066 var allocs []*structs.Allocation 1067 for i := 0; i < 10; i++ { 1068 alloc := mock.Alloc() 1069 alloc.Job = job 1070 alloc.JobID = job.ID 1071 alloc.NodeID = uuid.Generate() 1072 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1073 allocs = append(allocs, alloc) 1074 } 1075 1076 oldName := job.TaskGroups[0].Name 1077 newName := "different" 1078 job.TaskGroups[0].Name = newName 1079 1080 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 1081 r := reconciler.Compute() 1082 1083 // Assert the correct results 1084 assertResults(t, r, &resultExpectation{ 1085 createDeployment: nil, 1086 deploymentUpdates: nil, 1087 place: 10, 1088 inplace: 0, 1089 stop: 10, 1090 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1091 oldName: { 1092 Stop: 10, 1093 }, 1094 newName: { 1095 Place: 10, 1096 }, 1097 }, 1098 }) 1099 1100 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1101 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 1102 } 1103 1104 // Tests the reconciler properly handles a job in stopped states 1105 func TestReconciler_JobStopped(t *testing.T) { 1106 job := mock.Job() 1107 job.Stop = true 1108 1109 cases := []struct { 1110 name string 1111 job *structs.Job 1112 jobID, taskGroup string 1113 }{ 1114 { 1115 name: "stopped job", 1116 job: job, 1117 jobID: job.ID, 1118 taskGroup: job.TaskGroups[0].Name, 1119 }, 1120 { 1121 name: "nil job", 1122 job: nil, 1123 jobID: "foo", 1124 taskGroup: "bar", 1125 }, 1126 } 1127 1128 for _, c := range cases { 1129 t.Run(c.name, func(t *testing.T) { 1130 // Create 10 allocations 1131 var allocs []*structs.Allocation 1132 for i := 0; i < 10; i++ { 1133 alloc := mock.Alloc() 1134 alloc.Job = c.job 1135 alloc.JobID = c.jobID 1136 alloc.NodeID = uuid.Generate() 1137 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1138 alloc.TaskGroup = c.taskGroup 1139 allocs = append(allocs, alloc) 1140 } 1141 1142 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil) 1143 r := reconciler.Compute() 1144 1145 // Assert the correct results 1146 assertResults(t, r, &resultExpectation{ 1147 createDeployment: nil, 1148 deploymentUpdates: nil, 1149 place: 0, 1150 inplace: 0, 1151 stop: 10, 1152 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1153 c.taskGroup: { 1154 Stop: 10, 1155 }, 1156 }, 1157 }) 1158 1159 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1160 }) 1161 } 1162 } 1163 1164 // Tests the reconciler properly handles jobs with multiple task groups 1165 func TestReconciler_MultiTG(t *testing.T) { 1166 job := mock.Job() 1167 tg2 := job.TaskGroups[0].Copy() 1168 tg2.Name = "foo" 1169 job.TaskGroups = append(job.TaskGroups, tg2) 1170 1171 // Create 2 existing allocations for the first tg 1172 var allocs []*structs.Allocation 1173 for i := 0; i < 2; i++ { 1174 alloc := mock.Alloc() 1175 alloc.Job = job 1176 alloc.JobID = job.ID 1177 alloc.NodeID = uuid.Generate() 1178 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1179 allocs = append(allocs, alloc) 1180 } 1181 1182 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 1183 r := reconciler.Compute() 1184 1185 // Assert the correct results 1186 assertResults(t, r, &resultExpectation{ 1187 createDeployment: nil, 1188 deploymentUpdates: nil, 1189 place: 18, 1190 inplace: 0, 1191 stop: 0, 1192 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1193 job.TaskGroups[0].Name: { 1194 Place: 8, 1195 Ignore: 2, 1196 }, 1197 tg2.Name: { 1198 Place: 10, 1199 }, 1200 }, 1201 }) 1202 1203 assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) 1204 } 1205 1206 // Tests rescheduling failed batch allocations 1207 func TestReconciler_Reschedule_Batch(t *testing.T) { 1208 // Set desired 4 1209 job := mock.Job() 1210 job.TaskGroups[0].Count = 4 1211 1212 // Set up reschedule policy 1213 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour} 1214 1215 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1216 var allocs []*structs.Allocation 1217 for i := 0; i < 6; i++ { 1218 alloc := mock.Alloc() 1219 alloc.Job = job 1220 alloc.JobID = job.ID 1221 alloc.NodeID = uuid.Generate() 1222 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1223 allocs = append(allocs, alloc) 1224 alloc.ClientStatus = structs.AllocClientStatusRunning 1225 } 1226 // Mark 3 as failed with restart tracking info 1227 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1228 allocs[0].NextAllocation = allocs[1].ID 1229 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1230 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1231 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1232 PrevAllocID: allocs[0].ID, 1233 PrevNodeID: uuid.Generate(), 1234 }, 1235 }} 1236 allocs[1].NextAllocation = allocs[2].ID 1237 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1238 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1239 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1240 PrevAllocID: allocs[0].ID, 1241 PrevNodeID: uuid.Generate(), 1242 }, 1243 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1244 PrevAllocID: allocs[1].ID, 1245 PrevNodeID: uuid.Generate(), 1246 }, 1247 }} 1248 // Mark one as complete 1249 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1250 1251 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil) 1252 r := reconciler.Compute() 1253 1254 // Two reschedule attempts were made, one more can be made 1255 // Alloc 5 should not be replaced because it is terminal 1256 assertResults(t, r, &resultExpectation{ 1257 createDeployment: nil, 1258 deploymentUpdates: nil, 1259 place: 1, 1260 inplace: 0, 1261 stop: 0, 1262 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1263 job.TaskGroups[0].Name: { 1264 Place: 1, 1265 Ignore: 3, 1266 }, 1267 }, 1268 }) 1269 assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) 1270 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1271 assertPlacementsAreRescheduled(t, 1, r.place) 1272 } 1273 1274 // Tests rescheduling failed service allocations with desired state stop 1275 func TestReconciler_Reschedule_Service(t *testing.T) { 1276 // Set desired 5 1277 job := mock.Job() 1278 job.TaskGroups[0].Count = 5 1279 1280 // Set up reschedule policy 1281 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour} 1282 1283 // Create 5 existing allocations 1284 var allocs []*structs.Allocation 1285 for i := 0; i < 5; i++ { 1286 alloc := mock.Alloc() 1287 alloc.Job = job 1288 alloc.JobID = job.ID 1289 alloc.NodeID = uuid.Generate() 1290 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1291 allocs = append(allocs, alloc) 1292 alloc.ClientStatus = structs.AllocClientStatusRunning 1293 } 1294 // Mark two as failed 1295 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1296 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1297 1298 // Mark one of them as already rescheduled once 1299 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1300 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1301 PrevAllocID: uuid.Generate(), 1302 PrevNodeID: uuid.Generate(), 1303 }, 1304 }} 1305 1306 // Mark one as desired state stop 1307 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1308 1309 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 1310 r := reconciler.Compute() 1311 1312 // Should place 2, one is rescheduled, one is past its reschedule limit and one is a new placement 1313 assertResults(t, r, &resultExpectation{ 1314 createDeployment: nil, 1315 deploymentUpdates: nil, 1316 place: 2, 1317 inplace: 0, 1318 stop: 0, 1319 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1320 job.TaskGroups[0].Name: { 1321 Place: 2, 1322 Ignore: 3, 1323 }, 1324 }, 1325 }) 1326 1327 assertNamesHaveIndexes(t, intRange(0, 0, 4, 4), placeResultsToNames(r.place)) 1328 // 2 rescheduled allocs should have previous allocs 1329 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1330 assertPlacementsAreRescheduled(t, 1, r.place) 1331 } 1332 1333 // Tests failed service allocations that were already rescheduled won't be rescheduled again 1334 func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { 1335 // Set desired 5 1336 job := mock.Job() 1337 job.TaskGroups[0].Count = 5 1338 1339 // Set up reschedule policy 1340 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour} 1341 1342 // Create 7 existing allocations 1343 var allocs []*structs.Allocation 1344 for i := 0; i < 7; i++ { 1345 alloc := mock.Alloc() 1346 alloc.Job = job 1347 alloc.JobID = job.ID 1348 alloc.NodeID = uuid.Generate() 1349 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1350 allocs = append(allocs, alloc) 1351 alloc.ClientStatus = structs.AllocClientStatusRunning 1352 } 1353 // Mark two as failed and rescheduled 1354 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1355 allocs[0].ID = allocs[1].ID 1356 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1357 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1358 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1359 PrevAllocID: uuid.Generate(), 1360 PrevNodeID: uuid.Generate(), 1361 }, 1362 }} 1363 allocs[1].NextAllocation = allocs[2].ID 1364 1365 // Mark one as desired state stop 1366 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1367 1368 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 1369 r := reconciler.Compute() 1370 1371 // Should place 1 - one is a new placement to make up the desired count of 5 1372 // failing allocs are not rescheduled 1373 assertResults(t, r, &resultExpectation{ 1374 createDeployment: nil, 1375 deploymentUpdates: nil, 1376 place: 1, 1377 inplace: 0, 1378 stop: 0, 1379 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1380 job.TaskGroups[0].Name: { 1381 Place: 1, 1382 Ignore: 4, 1383 }, 1384 }, 1385 }) 1386 1387 // name index 0 is used for the replacement because its 1388 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 1389 } 1390 1391 // Tests the reconciler cancels an old deployment when the job is being stopped 1392 func TestReconciler_CancelDeployment_JobStop(t *testing.T) { 1393 job := mock.Job() 1394 job.Stop = true 1395 1396 running := structs.NewDeployment(job) 1397 failed := structs.NewDeployment(job) 1398 failed.Status = structs.DeploymentStatusFailed 1399 1400 cases := []struct { 1401 name string 1402 job *structs.Job 1403 jobID, taskGroup string 1404 deployment *structs.Deployment 1405 cancel bool 1406 }{ 1407 { 1408 name: "stopped job, running deployment", 1409 job: job, 1410 jobID: job.ID, 1411 taskGroup: job.TaskGroups[0].Name, 1412 deployment: running, 1413 cancel: true, 1414 }, 1415 { 1416 name: "nil job, running deployment", 1417 job: nil, 1418 jobID: "foo", 1419 taskGroup: "bar", 1420 deployment: running, 1421 cancel: true, 1422 }, 1423 { 1424 name: "stopped job, failed deployment", 1425 job: job, 1426 jobID: job.ID, 1427 taskGroup: job.TaskGroups[0].Name, 1428 deployment: failed, 1429 cancel: false, 1430 }, 1431 { 1432 name: "nil job, failed deployment", 1433 job: nil, 1434 jobID: "foo", 1435 taskGroup: "bar", 1436 deployment: failed, 1437 cancel: false, 1438 }, 1439 } 1440 1441 for _, c := range cases { 1442 t.Run(c.name, func(t *testing.T) { 1443 // Create 10 allocations 1444 var allocs []*structs.Allocation 1445 for i := 0; i < 10; i++ { 1446 alloc := mock.Alloc() 1447 alloc.Job = c.job 1448 alloc.JobID = c.jobID 1449 alloc.NodeID = uuid.Generate() 1450 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1451 alloc.TaskGroup = c.taskGroup 1452 allocs = append(allocs, alloc) 1453 } 1454 1455 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil) 1456 r := reconciler.Compute() 1457 1458 var updates []*structs.DeploymentStatusUpdate 1459 if c.cancel { 1460 updates = []*structs.DeploymentStatusUpdate{ 1461 { 1462 DeploymentID: c.deployment.ID, 1463 Status: structs.DeploymentStatusCancelled, 1464 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 1465 }, 1466 } 1467 } 1468 1469 // Assert the correct results 1470 assertResults(t, r, &resultExpectation{ 1471 createDeployment: nil, 1472 deploymentUpdates: updates, 1473 place: 0, 1474 inplace: 0, 1475 stop: 10, 1476 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1477 c.taskGroup: { 1478 Stop: 10, 1479 }, 1480 }, 1481 }) 1482 1483 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1484 }) 1485 } 1486 } 1487 1488 // Tests the reconciler cancels an old deployment when the job is updated 1489 func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { 1490 // Create a base job 1491 job := mock.Job() 1492 1493 // Create two deployments 1494 running := structs.NewDeployment(job) 1495 failed := structs.NewDeployment(job) 1496 failed.Status = structs.DeploymentStatusFailed 1497 1498 // Make the job newer than the deployment 1499 job.Version += 10 1500 1501 cases := []struct { 1502 name string 1503 deployment *structs.Deployment 1504 cancel bool 1505 }{ 1506 { 1507 name: "running deployment", 1508 deployment: running, 1509 cancel: true, 1510 }, 1511 { 1512 name: "failed deployment", 1513 deployment: failed, 1514 cancel: false, 1515 }, 1516 } 1517 1518 for _, c := range cases { 1519 t.Run(c.name, func(t *testing.T) { 1520 // Create 10 allocations 1521 var allocs []*structs.Allocation 1522 for i := 0; i < 10; i++ { 1523 alloc := mock.Alloc() 1524 alloc.Job = job 1525 alloc.JobID = job.ID 1526 alloc.NodeID = uuid.Generate() 1527 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1528 alloc.TaskGroup = job.TaskGroups[0].Name 1529 allocs = append(allocs, alloc) 1530 } 1531 1532 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil) 1533 r := reconciler.Compute() 1534 1535 var updates []*structs.DeploymentStatusUpdate 1536 if c.cancel { 1537 updates = []*structs.DeploymentStatusUpdate{ 1538 { 1539 DeploymentID: c.deployment.ID, 1540 Status: structs.DeploymentStatusCancelled, 1541 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 1542 }, 1543 } 1544 } 1545 1546 // Assert the correct results 1547 assertResults(t, r, &resultExpectation{ 1548 createDeployment: nil, 1549 deploymentUpdates: updates, 1550 place: 0, 1551 inplace: 0, 1552 stop: 0, 1553 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1554 job.TaskGroups[0].Name: { 1555 Ignore: 10, 1556 }, 1557 }, 1558 }) 1559 }) 1560 } 1561 } 1562 1563 // Tests the reconciler creates a deployment and does a rolling upgrade with 1564 // destructive changes 1565 func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { 1566 job := mock.Job() 1567 job.TaskGroups[0].Update = noCanaryUpdate 1568 1569 // Create 10 allocations from the old job 1570 var allocs []*structs.Allocation 1571 for i := 0; i < 10; i++ { 1572 alloc := mock.Alloc() 1573 alloc.Job = job 1574 alloc.JobID = job.ID 1575 alloc.NodeID = uuid.Generate() 1576 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1577 alloc.TaskGroup = job.TaskGroups[0].Name 1578 allocs = append(allocs, alloc) 1579 } 1580 1581 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 1582 r := reconciler.Compute() 1583 1584 d := structs.NewDeployment(job) 1585 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1586 DesiredTotal: 10, 1587 } 1588 1589 // Assert the correct results 1590 assertResults(t, r, &resultExpectation{ 1591 createDeployment: d, 1592 deploymentUpdates: nil, 1593 destructive: 4, 1594 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1595 job.TaskGroups[0].Name: { 1596 DestructiveUpdate: 4, 1597 Ignore: 6, 1598 }, 1599 }, 1600 }) 1601 1602 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 1603 } 1604 1605 // Tests the reconciler creates a deployment for inplace updates 1606 func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { 1607 job := mock.Job() 1608 job.TaskGroups[0].Update = noCanaryUpdate 1609 1610 // Create 10 allocations from the old job 1611 var allocs []*structs.Allocation 1612 for i := 0; i < 10; i++ { 1613 alloc := mock.Alloc() 1614 alloc.Job = job 1615 alloc.JobID = job.ID 1616 alloc.NodeID = uuid.Generate() 1617 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1618 alloc.TaskGroup = job.TaskGroups[0].Name 1619 allocs = append(allocs, alloc) 1620 } 1621 1622 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil) 1623 r := reconciler.Compute() 1624 1625 d := structs.NewDeployment(job) 1626 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1627 DesiredTotal: 10, 1628 } 1629 1630 // Assert the correct results 1631 assertResults(t, r, &resultExpectation{ 1632 createDeployment: d, 1633 deploymentUpdates: nil, 1634 place: 0, 1635 inplace: 10, 1636 stop: 0, 1637 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1638 job.TaskGroups[0].Name: { 1639 InPlaceUpdate: 10, 1640 }, 1641 }, 1642 }) 1643 } 1644 1645 // Tests the reconciler doesn't creates a deployment if there are no changes 1646 func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { 1647 job := mock.Job() 1648 job.TaskGroups[0].Update = noCanaryUpdate 1649 1650 // Create 10 allocations from the job 1651 var allocs []*structs.Allocation 1652 for i := 0; i < 10; i++ { 1653 alloc := mock.Alloc() 1654 alloc.Job = job 1655 alloc.JobID = job.ID 1656 alloc.NodeID = uuid.Generate() 1657 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1658 alloc.TaskGroup = job.TaskGroups[0].Name 1659 allocs = append(allocs, alloc) 1660 } 1661 1662 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil) 1663 r := reconciler.Compute() 1664 1665 // Assert the correct results 1666 assertResults(t, r, &resultExpectation{ 1667 createDeployment: nil, 1668 deploymentUpdates: nil, 1669 place: 0, 1670 inplace: 0, 1671 stop: 0, 1672 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1673 job.TaskGroups[0].Name: { 1674 DestructiveUpdate: 0, 1675 Ignore: 10, 1676 }, 1677 }, 1678 }) 1679 } 1680 1681 // Tests the reconciler doesn't place any more canaries when the deployment is 1682 // paused or failed 1683 func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { 1684 job := mock.Job() 1685 job.TaskGroups[0].Update = canaryUpdate 1686 1687 cases := []struct { 1688 name string 1689 deploymentStatus string 1690 stop uint64 1691 }{ 1692 { 1693 name: "paused deployment", 1694 deploymentStatus: structs.DeploymentStatusPaused, 1695 stop: 0, 1696 }, 1697 { 1698 name: "failed deployment", 1699 deploymentStatus: structs.DeploymentStatusFailed, 1700 stop: 1, 1701 }, 1702 } 1703 1704 for _, c := range cases { 1705 t.Run(c.name, func(t *testing.T) { 1706 // Create a deployment that is paused/failed and has placed some canaries 1707 d := structs.NewDeployment(job) 1708 d.Status = c.deploymentStatus 1709 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1710 Promoted: false, 1711 DesiredCanaries: 2, 1712 DesiredTotal: 10, 1713 PlacedAllocs: 1, 1714 } 1715 1716 // Create 10 allocations for the original job 1717 var allocs []*structs.Allocation 1718 for i := 0; i < 10; i++ { 1719 alloc := mock.Alloc() 1720 alloc.Job = job 1721 alloc.JobID = job.ID 1722 alloc.NodeID = uuid.Generate() 1723 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1724 alloc.TaskGroup = job.TaskGroups[0].Name 1725 allocs = append(allocs, alloc) 1726 } 1727 1728 // Create one canary 1729 canary := mock.Alloc() 1730 canary.Job = job 1731 canary.JobID = job.ID 1732 canary.NodeID = uuid.Generate() 1733 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 1734 canary.TaskGroup = job.TaskGroups[0].Name 1735 canary.DeploymentID = d.ID 1736 allocs = append(allocs, canary) 1737 d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} 1738 1739 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 1740 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 1741 r := reconciler.Compute() 1742 1743 // Assert the correct results 1744 assertResults(t, r, &resultExpectation{ 1745 createDeployment: nil, 1746 deploymentUpdates: nil, 1747 place: 0, 1748 inplace: 0, 1749 stop: int(c.stop), 1750 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1751 job.TaskGroups[0].Name: { 1752 Ignore: 11 - c.stop, 1753 Stop: c.stop, 1754 }, 1755 }, 1756 }) 1757 }) 1758 } 1759 } 1760 1761 // Tests the reconciler doesn't place any more allocs when the deployment is 1762 // paused or failed 1763 func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { 1764 job := mock.Job() 1765 job.TaskGroups[0].Update = noCanaryUpdate 1766 job.TaskGroups[0].Count = 15 1767 1768 cases := []struct { 1769 name string 1770 deploymentStatus string 1771 }{ 1772 { 1773 name: "paused deployment", 1774 deploymentStatus: structs.DeploymentStatusPaused, 1775 }, 1776 { 1777 name: "failed deployment", 1778 deploymentStatus: structs.DeploymentStatusFailed, 1779 }, 1780 } 1781 1782 for _, c := range cases { 1783 t.Run(c.name, func(t *testing.T) { 1784 // Create a deployment that is paused and has placed some canaries 1785 d := structs.NewDeployment(job) 1786 d.Status = c.deploymentStatus 1787 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1788 Promoted: false, 1789 DesiredTotal: 15, 1790 PlacedAllocs: 10, 1791 } 1792 1793 // Create 10 allocations for the new job 1794 var allocs []*structs.Allocation 1795 for i := 0; i < 10; i++ { 1796 alloc := mock.Alloc() 1797 alloc.Job = job 1798 alloc.JobID = job.ID 1799 alloc.NodeID = uuid.Generate() 1800 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1801 alloc.TaskGroup = job.TaskGroups[0].Name 1802 allocs = append(allocs, alloc) 1803 } 1804 1805 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil) 1806 r := reconciler.Compute() 1807 1808 // Assert the correct results 1809 assertResults(t, r, &resultExpectation{ 1810 createDeployment: nil, 1811 deploymentUpdates: nil, 1812 place: 0, 1813 inplace: 0, 1814 stop: 0, 1815 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1816 job.TaskGroups[0].Name: { 1817 Ignore: 10, 1818 }, 1819 }, 1820 }) 1821 }) 1822 } 1823 } 1824 1825 // Tests the reconciler doesn't do any more destructive updates when the 1826 // deployment is paused or failed 1827 func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { 1828 job := mock.Job() 1829 job.TaskGroups[0].Update = noCanaryUpdate 1830 1831 cases := []struct { 1832 name string 1833 deploymentStatus string 1834 }{ 1835 { 1836 name: "paused deployment", 1837 deploymentStatus: structs.DeploymentStatusPaused, 1838 }, 1839 { 1840 name: "failed deployment", 1841 deploymentStatus: structs.DeploymentStatusFailed, 1842 }, 1843 } 1844 1845 for _, c := range cases { 1846 t.Run(c.name, func(t *testing.T) { 1847 // Create a deployment that is paused and has placed some canaries 1848 d := structs.NewDeployment(job) 1849 d.Status = c.deploymentStatus 1850 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1851 Promoted: false, 1852 DesiredTotal: 10, 1853 PlacedAllocs: 1, 1854 } 1855 1856 // Create 9 allocations for the original job 1857 var allocs []*structs.Allocation 1858 for i := 1; i < 10; i++ { 1859 alloc := mock.Alloc() 1860 alloc.Job = job 1861 alloc.JobID = job.ID 1862 alloc.NodeID = uuid.Generate() 1863 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1864 alloc.TaskGroup = job.TaskGroups[0].Name 1865 allocs = append(allocs, alloc) 1866 } 1867 1868 // Create one for the new job 1869 newAlloc := mock.Alloc() 1870 newAlloc.Job = job 1871 newAlloc.JobID = job.ID 1872 newAlloc.NodeID = uuid.Generate() 1873 newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 1874 newAlloc.TaskGroup = job.TaskGroups[0].Name 1875 newAlloc.DeploymentID = d.ID 1876 allocs = append(allocs, newAlloc) 1877 1878 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 1879 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 1880 r := reconciler.Compute() 1881 1882 // Assert the correct results 1883 assertResults(t, r, &resultExpectation{ 1884 createDeployment: nil, 1885 deploymentUpdates: nil, 1886 place: 0, 1887 inplace: 0, 1888 stop: 0, 1889 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1890 job.TaskGroups[0].Name: { 1891 Ignore: 10, 1892 }, 1893 }, 1894 }) 1895 }) 1896 } 1897 } 1898 1899 // Tests the reconciler handles migrations correctly when a deployment is paused 1900 // or failed 1901 func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { 1902 job := mock.Job() 1903 job.TaskGroups[0].Update = noCanaryUpdate 1904 1905 cases := []struct { 1906 name string 1907 deploymentStatus string 1908 place int 1909 stop int 1910 ignoreAnnotation uint64 1911 migrateAnnotation uint64 1912 stopAnnotation uint64 1913 }{ 1914 { 1915 name: "paused deployment", 1916 deploymentStatus: structs.DeploymentStatusPaused, 1917 place: 0, 1918 stop: 3, 1919 ignoreAnnotation: 5, 1920 stopAnnotation: 3, 1921 }, 1922 { 1923 name: "failed deployment", 1924 deploymentStatus: structs.DeploymentStatusFailed, 1925 place: 0, 1926 stop: 3, 1927 ignoreAnnotation: 5, 1928 migrateAnnotation: 0, 1929 stopAnnotation: 3, 1930 }, 1931 } 1932 1933 for _, c := range cases { 1934 t.Run(c.name, func(t *testing.T) { 1935 // Create a deployment that is paused and has placed some canaries 1936 d := structs.NewDeployment(job) 1937 d.Status = c.deploymentStatus 1938 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1939 Promoted: false, 1940 DesiredTotal: 10, 1941 PlacedAllocs: 8, 1942 } 1943 1944 // Create 8 allocations in the deployment 1945 var allocs []*structs.Allocation 1946 for i := 0; i < 8; i++ { 1947 alloc := mock.Alloc() 1948 alloc.Job = job 1949 alloc.JobID = job.ID 1950 alloc.NodeID = uuid.Generate() 1951 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1952 alloc.TaskGroup = job.TaskGroups[0].Name 1953 alloc.DeploymentID = d.ID 1954 allocs = append(allocs, alloc) 1955 } 1956 1957 // Build a map of tainted nodes 1958 tainted := make(map[string]*structs.Node, 3) 1959 for i := 0; i < 3; i++ { 1960 n := mock.Node() 1961 n.ID = allocs[i].NodeID 1962 n.Drain = true 1963 tainted[n.ID] = n 1964 } 1965 1966 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted) 1967 r := reconciler.Compute() 1968 1969 // Assert the correct results 1970 assertResults(t, r, &resultExpectation{ 1971 createDeployment: nil, 1972 deploymentUpdates: nil, 1973 place: c.place, 1974 inplace: 0, 1975 stop: c.stop, 1976 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1977 job.TaskGroups[0].Name: { 1978 Migrate: c.migrateAnnotation, 1979 Ignore: c.ignoreAnnotation, 1980 Stop: c.stopAnnotation, 1981 }, 1982 }, 1983 }) 1984 }) 1985 } 1986 } 1987 1988 // Tests the reconciler handles migrating a canary correctly on a draining node 1989 func TestReconciler_DrainNode_Canary(t *testing.T) { 1990 job := mock.Job() 1991 job.TaskGroups[0].Update = canaryUpdate 1992 1993 // Create a deployment that is paused and has placed some canaries 1994 d := structs.NewDeployment(job) 1995 s := &structs.DeploymentState{ 1996 Promoted: false, 1997 DesiredTotal: 10, 1998 DesiredCanaries: 2, 1999 PlacedAllocs: 2, 2000 } 2001 d.TaskGroups[job.TaskGroups[0].Name] = s 2002 2003 // Create 10 allocations from the old job 2004 var allocs []*structs.Allocation 2005 for i := 0; i < 10; i++ { 2006 alloc := mock.Alloc() 2007 alloc.Job = job 2008 alloc.JobID = job.ID 2009 alloc.NodeID = uuid.Generate() 2010 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2011 alloc.TaskGroup = job.TaskGroups[0].Name 2012 allocs = append(allocs, alloc) 2013 } 2014 2015 // Create two canaries for the new job 2016 handled := make(map[string]allocUpdateType) 2017 for i := 0; i < 2; i++ { 2018 // Create one canary 2019 canary := mock.Alloc() 2020 canary.Job = job 2021 canary.JobID = job.ID 2022 canary.NodeID = uuid.Generate() 2023 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2024 canary.TaskGroup = job.TaskGroups[0].Name 2025 canary.DeploymentID = d.ID 2026 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2027 allocs = append(allocs, canary) 2028 handled[canary.ID] = allocUpdateFnIgnore 2029 } 2030 2031 // Build a map of tainted nodes that contains the last canary 2032 tainted := make(map[string]*structs.Node, 1) 2033 n := mock.Node() 2034 n.ID = allocs[11].NodeID 2035 n.Drain = true 2036 tainted[n.ID] = n 2037 2038 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2039 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) 2040 r := reconciler.Compute() 2041 2042 // Assert the correct results 2043 assertResults(t, r, &resultExpectation{ 2044 createDeployment: nil, 2045 deploymentUpdates: nil, 2046 place: 1, 2047 inplace: 0, 2048 stop: 1, 2049 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2050 job.TaskGroups[0].Name: { 2051 Canary: 1, 2052 Ignore: 11, 2053 }, 2054 }, 2055 }) 2056 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2057 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2058 } 2059 2060 // Tests the reconciler handles migrating a canary correctly on a lost node 2061 func TestReconciler_LostNode_Canary(t *testing.T) { 2062 job := mock.Job() 2063 job.TaskGroups[0].Update = canaryUpdate 2064 2065 // Create a deployment that is paused and has placed some canaries 2066 d := structs.NewDeployment(job) 2067 s := &structs.DeploymentState{ 2068 Promoted: false, 2069 DesiredTotal: 10, 2070 DesiredCanaries: 2, 2071 PlacedAllocs: 2, 2072 } 2073 d.TaskGroups[job.TaskGroups[0].Name] = s 2074 2075 // Create 10 allocations from the old job 2076 var allocs []*structs.Allocation 2077 for i := 0; i < 10; i++ { 2078 alloc := mock.Alloc() 2079 alloc.Job = job 2080 alloc.JobID = job.ID 2081 alloc.NodeID = uuid.Generate() 2082 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2083 alloc.TaskGroup = job.TaskGroups[0].Name 2084 allocs = append(allocs, alloc) 2085 } 2086 2087 // Create two canaries for the new job 2088 handled := make(map[string]allocUpdateType) 2089 for i := 0; i < 2; i++ { 2090 // Create one canary 2091 canary := mock.Alloc() 2092 canary.Job = job 2093 canary.JobID = job.ID 2094 canary.NodeID = uuid.Generate() 2095 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2096 canary.TaskGroup = job.TaskGroups[0].Name 2097 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2098 canary.DeploymentID = d.ID 2099 allocs = append(allocs, canary) 2100 handled[canary.ID] = allocUpdateFnIgnore 2101 } 2102 2103 // Build a map of tainted nodes that contains the last canary 2104 tainted := make(map[string]*structs.Node, 1) 2105 n := mock.Node() 2106 n.ID = allocs[11].NodeID 2107 n.Status = structs.NodeStatusDown 2108 tainted[n.ID] = n 2109 2110 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2111 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) 2112 r := reconciler.Compute() 2113 2114 // Assert the correct results 2115 assertResults(t, r, &resultExpectation{ 2116 createDeployment: nil, 2117 deploymentUpdates: nil, 2118 place: 1, 2119 inplace: 0, 2120 stop: 1, 2121 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2122 job.TaskGroups[0].Name: { 2123 Canary: 1, 2124 Ignore: 11, 2125 }, 2126 }, 2127 }) 2128 2129 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2130 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2131 } 2132 2133 // Tests the reconciler handles stopping canaries from older deployments 2134 func TestReconciler_StopOldCanaries(t *testing.T) { 2135 job := mock.Job() 2136 job.TaskGroups[0].Update = canaryUpdate 2137 2138 // Create an old deployment that has placed some canaries 2139 d := structs.NewDeployment(job) 2140 s := &structs.DeploymentState{ 2141 Promoted: false, 2142 DesiredTotal: 10, 2143 DesiredCanaries: 2, 2144 PlacedAllocs: 2, 2145 } 2146 d.TaskGroups[job.TaskGroups[0].Name] = s 2147 2148 // Update the job 2149 job.Version += 10 2150 2151 // Create 10 allocations from the old job 2152 var allocs []*structs.Allocation 2153 for i := 0; i < 10; i++ { 2154 alloc := mock.Alloc() 2155 alloc.Job = job 2156 alloc.JobID = job.ID 2157 alloc.NodeID = uuid.Generate() 2158 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2159 alloc.TaskGroup = job.TaskGroups[0].Name 2160 allocs = append(allocs, alloc) 2161 } 2162 2163 // Create canaries 2164 for i := 0; i < 2; i++ { 2165 // Create one canary 2166 canary := mock.Alloc() 2167 canary.Job = job 2168 canary.JobID = job.ID 2169 canary.NodeID = uuid.Generate() 2170 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2171 canary.TaskGroup = job.TaskGroups[0].Name 2172 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2173 canary.DeploymentID = d.ID 2174 allocs = append(allocs, canary) 2175 } 2176 2177 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) 2178 r := reconciler.Compute() 2179 2180 newD := structs.NewDeployment(job) 2181 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2182 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2183 DesiredCanaries: 2, 2184 DesiredTotal: 10, 2185 } 2186 2187 // Assert the correct results 2188 assertResults(t, r, &resultExpectation{ 2189 createDeployment: newD, 2190 deploymentUpdates: []*structs.DeploymentStatusUpdate{ 2191 { 2192 DeploymentID: d.ID, 2193 Status: structs.DeploymentStatusCancelled, 2194 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2195 }, 2196 }, 2197 place: 2, 2198 inplace: 0, 2199 stop: 2, 2200 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2201 job.TaskGroups[0].Name: { 2202 Canary: 2, 2203 Stop: 2, 2204 Ignore: 10, 2205 }, 2206 }, 2207 }) 2208 2209 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2210 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2211 } 2212 2213 // Tests the reconciler creates new canaries when the job changes 2214 func TestReconciler_NewCanaries(t *testing.T) { 2215 job := mock.Job() 2216 job.TaskGroups[0].Update = canaryUpdate 2217 2218 // Create 10 allocations from the old job 2219 var allocs []*structs.Allocation 2220 for i := 0; i < 10; i++ { 2221 alloc := mock.Alloc() 2222 alloc.Job = job 2223 alloc.JobID = job.ID 2224 alloc.NodeID = uuid.Generate() 2225 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2226 alloc.TaskGroup = job.TaskGroups[0].Name 2227 allocs = append(allocs, alloc) 2228 } 2229 2230 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 2231 r := reconciler.Compute() 2232 2233 newD := structs.NewDeployment(job) 2234 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2235 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2236 DesiredCanaries: 2, 2237 DesiredTotal: 10, 2238 } 2239 2240 // Assert the correct results 2241 assertResults(t, r, &resultExpectation{ 2242 createDeployment: newD, 2243 deploymentUpdates: nil, 2244 place: 2, 2245 inplace: 0, 2246 stop: 0, 2247 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2248 job.TaskGroups[0].Name: { 2249 Canary: 2, 2250 Ignore: 10, 2251 }, 2252 }, 2253 }) 2254 2255 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2256 } 2257 2258 // Tests the reconciler creates new canaries when the job changes for multiple 2259 // task groups 2260 func TestReconciler_NewCanaries_MultiTG(t *testing.T) { 2261 job := mock.Job() 2262 job.TaskGroups[0].Update = canaryUpdate 2263 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 2264 job.TaskGroups[0].Name = "tg2" 2265 2266 // Create 10 allocations from the old job for each tg 2267 var allocs []*structs.Allocation 2268 for j := 0; j < 2; j++ { 2269 for i := 0; i < 10; i++ { 2270 alloc := mock.Alloc() 2271 alloc.Job = job 2272 alloc.JobID = job.ID 2273 alloc.NodeID = uuid.Generate() 2274 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 2275 alloc.TaskGroup = job.TaskGroups[j].Name 2276 allocs = append(allocs, alloc) 2277 } 2278 } 2279 2280 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 2281 r := reconciler.Compute() 2282 2283 newD := structs.NewDeployment(job) 2284 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2285 state := &structs.DeploymentState{ 2286 DesiredCanaries: 2, 2287 DesiredTotal: 10, 2288 } 2289 newD.TaskGroups[job.TaskGroups[0].Name] = state 2290 newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy() 2291 2292 // Assert the correct results 2293 assertResults(t, r, &resultExpectation{ 2294 createDeployment: newD, 2295 deploymentUpdates: nil, 2296 place: 4, 2297 inplace: 0, 2298 stop: 0, 2299 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2300 job.TaskGroups[0].Name: { 2301 Canary: 2, 2302 Ignore: 10, 2303 }, 2304 job.TaskGroups[1].Name: { 2305 Canary: 2, 2306 Ignore: 10, 2307 }, 2308 }, 2309 }) 2310 2311 assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) 2312 } 2313 2314 // Tests the reconciler creates new canaries when the job changes and scales up 2315 func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { 2316 // Scale the job up to 15 2317 job := mock.Job() 2318 job.TaskGroups[0].Update = canaryUpdate 2319 job.TaskGroups[0].Count = 15 2320 2321 // Create 10 allocations from the old job 2322 var allocs []*structs.Allocation 2323 for i := 0; i < 10; i++ { 2324 alloc := mock.Alloc() 2325 alloc.Job = job 2326 alloc.JobID = job.ID 2327 alloc.NodeID = uuid.Generate() 2328 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2329 alloc.TaskGroup = job.TaskGroups[0].Name 2330 allocs = append(allocs, alloc) 2331 } 2332 2333 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 2334 r := reconciler.Compute() 2335 2336 newD := structs.NewDeployment(job) 2337 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2338 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2339 DesiredCanaries: 2, 2340 DesiredTotal: 15, 2341 } 2342 2343 // Assert the correct results 2344 assertResults(t, r, &resultExpectation{ 2345 createDeployment: newD, 2346 deploymentUpdates: nil, 2347 place: 2, 2348 inplace: 0, 2349 stop: 0, 2350 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2351 job.TaskGroups[0].Name: { 2352 Canary: 2, 2353 Ignore: 10, 2354 }, 2355 }, 2356 }) 2357 2358 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2359 } 2360 2361 // Tests the reconciler creates new canaries when the job changes and scales 2362 // down 2363 func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { 2364 // Scale the job down to 5 2365 job := mock.Job() 2366 job.TaskGroups[0].Update = canaryUpdate 2367 job.TaskGroups[0].Count = 5 2368 2369 // Create 10 allocations from the old job 2370 var allocs []*structs.Allocation 2371 for i := 0; i < 10; i++ { 2372 alloc := mock.Alloc() 2373 alloc.Job = job 2374 alloc.JobID = job.ID 2375 alloc.NodeID = uuid.Generate() 2376 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2377 alloc.TaskGroup = job.TaskGroups[0].Name 2378 allocs = append(allocs, alloc) 2379 } 2380 2381 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 2382 r := reconciler.Compute() 2383 2384 newD := structs.NewDeployment(job) 2385 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2386 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2387 DesiredCanaries: 2, 2388 DesiredTotal: 5, 2389 } 2390 2391 // Assert the correct results 2392 assertResults(t, r, &resultExpectation{ 2393 createDeployment: newD, 2394 deploymentUpdates: nil, 2395 place: 2, 2396 inplace: 0, 2397 stop: 5, 2398 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2399 job.TaskGroups[0].Name: { 2400 Canary: 2, 2401 Stop: 5, 2402 Ignore: 5, 2403 }, 2404 }, 2405 }) 2406 2407 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2408 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 2409 } 2410 2411 // Tests the reconciler handles filling the names of partially placed canaries 2412 func TestReconciler_NewCanaries_FillNames(t *testing.T) { 2413 job := mock.Job() 2414 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 2415 Canary: 4, 2416 MaxParallel: 2, 2417 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 2418 MinHealthyTime: 10 * time.Second, 2419 HealthyDeadline: 10 * time.Minute, 2420 } 2421 2422 // Create an existing deployment that has placed some canaries 2423 d := structs.NewDeployment(job) 2424 s := &structs.DeploymentState{ 2425 Promoted: false, 2426 DesiredTotal: 10, 2427 DesiredCanaries: 4, 2428 PlacedAllocs: 2, 2429 } 2430 d.TaskGroups[job.TaskGroups[0].Name] = s 2431 2432 // Create 10 allocations from the old job 2433 var allocs []*structs.Allocation 2434 for i := 0; i < 10; i++ { 2435 alloc := mock.Alloc() 2436 alloc.Job = job 2437 alloc.JobID = job.ID 2438 alloc.NodeID = uuid.Generate() 2439 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2440 alloc.TaskGroup = job.TaskGroups[0].Name 2441 allocs = append(allocs, alloc) 2442 } 2443 2444 // Create canaries but pick names at the ends 2445 for i := 0; i < 4; i += 3 { 2446 // Create one canary 2447 canary := mock.Alloc() 2448 canary.Job = job 2449 canary.JobID = job.ID 2450 canary.NodeID = uuid.Generate() 2451 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2452 canary.TaskGroup = job.TaskGroups[0].Name 2453 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2454 canary.DeploymentID = d.ID 2455 allocs = append(allocs, canary) 2456 } 2457 2458 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) 2459 r := reconciler.Compute() 2460 2461 // Assert the correct results 2462 assertResults(t, r, &resultExpectation{ 2463 createDeployment: nil, 2464 deploymentUpdates: nil, 2465 place: 2, 2466 inplace: 0, 2467 stop: 0, 2468 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2469 job.TaskGroups[0].Name: { 2470 Canary: 2, 2471 Ignore: 12, 2472 }, 2473 }, 2474 }) 2475 2476 assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) 2477 } 2478 2479 // Tests the reconciler handles canary promotion by unblocking max_parallel 2480 func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { 2481 job := mock.Job() 2482 job.TaskGroups[0].Update = canaryUpdate 2483 2484 // Create an existing deployment that has placed some canaries and mark them 2485 // promoted 2486 d := structs.NewDeployment(job) 2487 s := &structs.DeploymentState{ 2488 Promoted: true, 2489 DesiredTotal: 10, 2490 DesiredCanaries: 2, 2491 PlacedAllocs: 2, 2492 } 2493 d.TaskGroups[job.TaskGroups[0].Name] = s 2494 2495 // Create 10 allocations from the old job 2496 var allocs []*structs.Allocation 2497 for i := 0; i < 10; i++ { 2498 alloc := mock.Alloc() 2499 alloc.Job = job 2500 alloc.JobID = job.ID 2501 alloc.NodeID = uuid.Generate() 2502 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2503 alloc.TaskGroup = job.TaskGroups[0].Name 2504 allocs = append(allocs, alloc) 2505 } 2506 2507 // Create the canaries 2508 handled := make(map[string]allocUpdateType) 2509 for i := 0; i < 2; i++ { 2510 // Create one canary 2511 canary := mock.Alloc() 2512 canary.Job = job 2513 canary.JobID = job.ID 2514 canary.NodeID = uuid.Generate() 2515 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2516 canary.TaskGroup = job.TaskGroups[0].Name 2517 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2518 canary.DeploymentID = d.ID 2519 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 2520 Healthy: helper.BoolToPtr(true), 2521 } 2522 allocs = append(allocs, canary) 2523 handled[canary.ID] = allocUpdateFnIgnore 2524 } 2525 2526 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2527 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 2528 r := reconciler.Compute() 2529 2530 // Assert the correct results 2531 assertResults(t, r, &resultExpectation{ 2532 createDeployment: nil, 2533 deploymentUpdates: nil, 2534 destructive: 2, 2535 stop: 2, 2536 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2537 job.TaskGroups[0].Name: { 2538 Stop: 2, 2539 DestructiveUpdate: 2, 2540 Ignore: 8, 2541 }, 2542 }, 2543 }) 2544 2545 assertNoCanariesStopped(t, d, r.stop) 2546 assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) 2547 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2548 } 2549 2550 // Tests the reconciler handles canary promotion when the canary count equals 2551 // the total correctly 2552 func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { 2553 job := mock.Job() 2554 job.TaskGroups[0].Update = canaryUpdate 2555 job.TaskGroups[0].Count = 2 2556 2557 // Create an existing deployment that has placed some canaries and mark them 2558 // promoted 2559 d := structs.NewDeployment(job) 2560 s := &structs.DeploymentState{ 2561 Promoted: true, 2562 DesiredTotal: 2, 2563 DesiredCanaries: 2, 2564 PlacedAllocs: 2, 2565 } 2566 d.TaskGroups[job.TaskGroups[0].Name] = s 2567 2568 // Create 2 allocations from the old job 2569 var allocs []*structs.Allocation 2570 for i := 0; i < 2; i++ { 2571 alloc := mock.Alloc() 2572 alloc.Job = job 2573 alloc.JobID = job.ID 2574 alloc.NodeID = uuid.Generate() 2575 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2576 alloc.TaskGroup = job.TaskGroups[0].Name 2577 allocs = append(allocs, alloc) 2578 } 2579 2580 // Create the canaries 2581 handled := make(map[string]allocUpdateType) 2582 for i := 0; i < 2; i++ { 2583 // Create one canary 2584 canary := mock.Alloc() 2585 canary.Job = job 2586 canary.JobID = job.ID 2587 canary.NodeID = uuid.Generate() 2588 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2589 canary.TaskGroup = job.TaskGroups[0].Name 2590 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2591 canary.DeploymentID = d.ID 2592 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 2593 Healthy: helper.BoolToPtr(true), 2594 } 2595 allocs = append(allocs, canary) 2596 handled[canary.ID] = allocUpdateFnIgnore 2597 } 2598 2599 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2600 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 2601 r := reconciler.Compute() 2602 2603 updates := []*structs.DeploymentStatusUpdate{ 2604 { 2605 DeploymentID: d.ID, 2606 Status: structs.DeploymentStatusSuccessful, 2607 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 2608 }, 2609 } 2610 2611 // Assert the correct results 2612 assertResults(t, r, &resultExpectation{ 2613 createDeployment: nil, 2614 deploymentUpdates: updates, 2615 place: 0, 2616 inplace: 0, 2617 stop: 2, 2618 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2619 job.TaskGroups[0].Name: { 2620 Stop: 2, 2621 Ignore: 2, 2622 }, 2623 }, 2624 }) 2625 2626 assertNoCanariesStopped(t, d, r.stop) 2627 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2628 } 2629 2630 // Tests the reconciler checks the health of placed allocs to determine the 2631 // limit 2632 func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { 2633 job := mock.Job() 2634 job.TaskGroups[0].Update = noCanaryUpdate 2635 2636 cases := []struct { 2637 healthy int 2638 }{ 2639 { 2640 healthy: 0, 2641 }, 2642 { 2643 healthy: 1, 2644 }, 2645 { 2646 healthy: 2, 2647 }, 2648 { 2649 healthy: 3, 2650 }, 2651 { 2652 healthy: 4, 2653 }, 2654 } 2655 2656 for _, c := range cases { 2657 t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { 2658 // Create an existing deployment that has placed some canaries and mark them 2659 // promoted 2660 d := structs.NewDeployment(job) 2661 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2662 Promoted: true, 2663 DesiredTotal: 10, 2664 PlacedAllocs: 4, 2665 } 2666 2667 // Create 6 allocations from the old job 2668 var allocs []*structs.Allocation 2669 for i := 4; i < 10; i++ { 2670 alloc := mock.Alloc() 2671 alloc.Job = job 2672 alloc.JobID = job.ID 2673 alloc.NodeID = uuid.Generate() 2674 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2675 alloc.TaskGroup = job.TaskGroups[0].Name 2676 allocs = append(allocs, alloc) 2677 } 2678 2679 // Create the new allocs 2680 handled := make(map[string]allocUpdateType) 2681 for i := 0; i < 4; i++ { 2682 new := mock.Alloc() 2683 new.Job = job 2684 new.JobID = job.ID 2685 new.NodeID = uuid.Generate() 2686 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2687 new.TaskGroup = job.TaskGroups[0].Name 2688 new.DeploymentID = d.ID 2689 if i < c.healthy { 2690 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 2691 Healthy: helper.BoolToPtr(true), 2692 } 2693 } 2694 allocs = append(allocs, new) 2695 handled[new.ID] = allocUpdateFnIgnore 2696 } 2697 2698 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2699 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 2700 r := reconciler.Compute() 2701 2702 // Assert the correct results 2703 assertResults(t, r, &resultExpectation{ 2704 createDeployment: nil, 2705 deploymentUpdates: nil, 2706 destructive: c.healthy, 2707 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2708 job.TaskGroups[0].Name: { 2709 DestructiveUpdate: uint64(c.healthy), 2710 Ignore: uint64(10 - c.healthy), 2711 }, 2712 }, 2713 }) 2714 2715 if c.healthy != 0 { 2716 assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) 2717 } 2718 }) 2719 } 2720 } 2721 2722 // Tests the reconciler handles an alloc on a tainted node during a rolling 2723 // update 2724 func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { 2725 job := mock.Job() 2726 job.TaskGroups[0].Update = noCanaryUpdate 2727 2728 // Create an existing deployment that has some placed allocs 2729 d := structs.NewDeployment(job) 2730 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2731 Promoted: true, 2732 DesiredTotal: 10, 2733 PlacedAllocs: 7, 2734 } 2735 2736 // Create 2 allocations from the old job 2737 var allocs []*structs.Allocation 2738 for i := 8; i < 10; i++ { 2739 alloc := mock.Alloc() 2740 alloc.Job = job 2741 alloc.JobID = job.ID 2742 alloc.NodeID = uuid.Generate() 2743 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2744 alloc.TaskGroup = job.TaskGroups[0].Name 2745 allocs = append(allocs, alloc) 2746 } 2747 2748 // Create the healthy replacements 2749 handled := make(map[string]allocUpdateType) 2750 for i := 0; i < 8; i++ { 2751 new := mock.Alloc() 2752 new.Job = job 2753 new.JobID = job.ID 2754 new.NodeID = uuid.Generate() 2755 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2756 new.TaskGroup = job.TaskGroups[0].Name 2757 new.DeploymentID = d.ID 2758 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 2759 Healthy: helper.BoolToPtr(true), 2760 } 2761 allocs = append(allocs, new) 2762 handled[new.ID] = allocUpdateFnIgnore 2763 } 2764 2765 // Build a map of tainted nodes 2766 tainted := make(map[string]*structs.Node, 3) 2767 for i := 0; i < 3; i++ { 2768 n := mock.Node() 2769 n.ID = allocs[2+i].NodeID 2770 if i == 0 { 2771 n.Status = structs.NodeStatusDown 2772 } else { 2773 n.Drain = true 2774 } 2775 tainted[n.ID] = n 2776 } 2777 2778 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2779 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) 2780 r := reconciler.Compute() 2781 2782 // Assert the correct results 2783 assertResults(t, r, &resultExpectation{ 2784 createDeployment: nil, 2785 deploymentUpdates: nil, 2786 place: 2, 2787 destructive: 2, 2788 stop: 2, 2789 followupEvalWait: 31 * time.Second, 2790 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2791 job.TaskGroups[0].Name: { 2792 Place: 1, // Place the lost 2793 Stop: 1, // Stop the lost 2794 Migrate: 1, // Migrate the tainted 2795 DestructiveUpdate: 2, 2796 Ignore: 6, 2797 }, 2798 }, 2799 }) 2800 2801 assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) 2802 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2803 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2804 } 2805 2806 // Tests the reconciler handles a failed deployment and only replaces lost 2807 // deployments 2808 func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { 2809 job := mock.Job() 2810 job.TaskGroups[0].Update = noCanaryUpdate 2811 2812 // Create an existing failed deployment that has some placed allocs 2813 d := structs.NewDeployment(job) 2814 d.Status = structs.DeploymentStatusFailed 2815 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2816 Promoted: true, 2817 DesiredTotal: 10, 2818 PlacedAllocs: 4, 2819 } 2820 2821 // Create 6 allocations from the old job 2822 var allocs []*structs.Allocation 2823 for i := 4; i < 10; i++ { 2824 alloc := mock.Alloc() 2825 alloc.Job = job 2826 alloc.JobID = job.ID 2827 alloc.NodeID = uuid.Generate() 2828 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2829 alloc.TaskGroup = job.TaskGroups[0].Name 2830 allocs = append(allocs, alloc) 2831 } 2832 2833 // Create the healthy replacements 2834 handled := make(map[string]allocUpdateType) 2835 for i := 0; i < 4; i++ { 2836 new := mock.Alloc() 2837 new.Job = job 2838 new.JobID = job.ID 2839 new.NodeID = uuid.Generate() 2840 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2841 new.TaskGroup = job.TaskGroups[0].Name 2842 new.DeploymentID = d.ID 2843 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 2844 Healthy: helper.BoolToPtr(true), 2845 } 2846 allocs = append(allocs, new) 2847 handled[new.ID] = allocUpdateFnIgnore 2848 } 2849 2850 // Build a map of tainted nodes 2851 tainted := make(map[string]*structs.Node, 2) 2852 for i := 0; i < 2; i++ { 2853 n := mock.Node() 2854 n.ID = allocs[6+i].NodeID 2855 if i == 0 { 2856 n.Status = structs.NodeStatusDown 2857 } else { 2858 n.Drain = true 2859 } 2860 tainted[n.ID] = n 2861 } 2862 2863 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2864 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted) 2865 r := reconciler.Compute() 2866 2867 // Assert the correct results 2868 assertResults(t, r, &resultExpectation{ 2869 createDeployment: nil, 2870 deploymentUpdates: nil, 2871 place: 1, // Only replace the lost node 2872 inplace: 0, 2873 stop: 2, 2874 followupEvalWait: 0, // Since the deployment is failed, there should be no followup 2875 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2876 job.TaskGroups[0].Name: { 2877 Place: 1, 2878 Stop: 2, 2879 Ignore: 8, 2880 }, 2881 }, 2882 }) 2883 2884 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 2885 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2886 } 2887 2888 // Tests the reconciler handles a run after a deployment is complete 2889 // successfully. 2890 func TestReconciler_CompleteDeployment(t *testing.T) { 2891 job := mock.Job() 2892 job.TaskGroups[0].Update = canaryUpdate 2893 2894 d := structs.NewDeployment(job) 2895 d.Status = structs.DeploymentStatusSuccessful 2896 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2897 Promoted: true, 2898 DesiredTotal: 10, 2899 DesiredCanaries: 2, 2900 PlacedAllocs: 10, 2901 HealthyAllocs: 10, 2902 } 2903 2904 // Create allocations from the old job 2905 var allocs []*structs.Allocation 2906 for i := 0; i < 10; i++ { 2907 alloc := mock.Alloc() 2908 alloc.Job = job 2909 alloc.JobID = job.ID 2910 alloc.NodeID = uuid.Generate() 2911 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2912 alloc.TaskGroup = job.TaskGroups[0].Name 2913 alloc.DeploymentID = d.ID 2914 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2915 Healthy: helper.BoolToPtr(true), 2916 } 2917 allocs = append(allocs, alloc) 2918 } 2919 2920 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil) 2921 r := reconciler.Compute() 2922 2923 // Assert the correct results 2924 assertResults(t, r, &resultExpectation{ 2925 createDeployment: nil, 2926 deploymentUpdates: nil, 2927 place: 0, 2928 inplace: 0, 2929 stop: 0, 2930 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2931 job.TaskGroups[0].Name: { 2932 Ignore: 10, 2933 }, 2934 }, 2935 }) 2936 } 2937 2938 // Test that a failed deployment cancels non-promoted canaries 2939 func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { 2940 // Create a job with two task groups 2941 job := mock.Job() 2942 job.TaskGroups[0].Update = canaryUpdate 2943 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 2944 job.TaskGroups[1].Name = "two" 2945 2946 // Create an existing failed deployment that has promoted one task group 2947 d := structs.NewDeployment(job) 2948 d.Status = structs.DeploymentStatusFailed 2949 s0 := &structs.DeploymentState{ 2950 Promoted: true, 2951 DesiredTotal: 10, 2952 DesiredCanaries: 2, 2953 PlacedAllocs: 4, 2954 } 2955 s1 := &structs.DeploymentState{ 2956 Promoted: false, 2957 DesiredTotal: 10, 2958 DesiredCanaries: 2, 2959 PlacedAllocs: 2, 2960 } 2961 d.TaskGroups[job.TaskGroups[0].Name] = s0 2962 d.TaskGroups[job.TaskGroups[1].Name] = s1 2963 2964 // Create 6 allocations from the old job 2965 var allocs []*structs.Allocation 2966 handled := make(map[string]allocUpdateType) 2967 for _, group := range []int{0, 1} { 2968 replacements := 4 2969 state := s0 2970 if group == 1 { 2971 replacements = 2 2972 state = s1 2973 } 2974 2975 // Create the healthy replacements 2976 for i := 0; i < replacements; i++ { 2977 new := mock.Alloc() 2978 new.Job = job 2979 new.JobID = job.ID 2980 new.NodeID = uuid.Generate() 2981 new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 2982 new.TaskGroup = job.TaskGroups[group].Name 2983 new.DeploymentID = d.ID 2984 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 2985 Healthy: helper.BoolToPtr(true), 2986 } 2987 allocs = append(allocs, new) 2988 handled[new.ID] = allocUpdateFnIgnore 2989 2990 // Add the alloc to the canary list 2991 if i < 2 { 2992 state.PlacedCanaries = append(state.PlacedCanaries, new.ID) 2993 } 2994 } 2995 for i := replacements; i < 10; i++ { 2996 alloc := mock.Alloc() 2997 alloc.Job = job 2998 alloc.JobID = job.ID 2999 alloc.NodeID = uuid.Generate() 3000 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 3001 alloc.TaskGroup = job.TaskGroups[group].Name 3002 allocs = append(allocs, alloc) 3003 } 3004 } 3005 3006 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3007 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 3008 r := reconciler.Compute() 3009 3010 // Assert the correct results 3011 assertResults(t, r, &resultExpectation{ 3012 createDeployment: nil, 3013 deploymentUpdates: nil, 3014 place: 0, 3015 inplace: 0, 3016 stop: 2, 3017 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3018 job.TaskGroups[0].Name: { 3019 Ignore: 10, 3020 }, 3021 job.TaskGroups[1].Name: { 3022 Stop: 2, 3023 Ignore: 8, 3024 }, 3025 }, 3026 }) 3027 3028 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3029 } 3030 3031 // Test that a failed deployment and updated job works 3032 func TestReconciler_FailedDeployment_NewJob(t *testing.T) { 3033 job := mock.Job() 3034 job.TaskGroups[0].Update = noCanaryUpdate 3035 3036 // Create an existing failed deployment that has some placed allocs 3037 d := structs.NewDeployment(job) 3038 d.Status = structs.DeploymentStatusFailed 3039 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3040 Promoted: true, 3041 DesiredTotal: 10, 3042 PlacedAllocs: 4, 3043 } 3044 3045 // Create 6 allocations from the old job 3046 var allocs []*structs.Allocation 3047 for i := 4; i < 10; i++ { 3048 alloc := mock.Alloc() 3049 alloc.Job = job 3050 alloc.JobID = job.ID 3051 alloc.NodeID = uuid.Generate() 3052 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3053 alloc.TaskGroup = job.TaskGroups[0].Name 3054 allocs = append(allocs, alloc) 3055 } 3056 3057 // Create the healthy replacements 3058 for i := 0; i < 4; i++ { 3059 new := mock.Alloc() 3060 new.Job = job 3061 new.JobID = job.ID 3062 new.NodeID = uuid.Generate() 3063 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3064 new.TaskGroup = job.TaskGroups[0].Name 3065 new.DeploymentID = d.ID 3066 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3067 Healthy: helper.BoolToPtr(true), 3068 } 3069 allocs = append(allocs, new) 3070 } 3071 3072 // Up the job version 3073 jobNew := job.Copy() 3074 jobNew.Version += 100 3075 3076 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil) 3077 r := reconciler.Compute() 3078 3079 dnew := structs.NewDeployment(jobNew) 3080 dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3081 DesiredTotal: 10, 3082 } 3083 3084 // Assert the correct results 3085 assertResults(t, r, &resultExpectation{ 3086 createDeployment: dnew, 3087 deploymentUpdates: nil, 3088 destructive: 4, 3089 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3090 job.TaskGroups[0].Name: { 3091 DestructiveUpdate: 4, 3092 Ignore: 6, 3093 }, 3094 }, 3095 }) 3096 3097 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 3098 } 3099 3100 // Tests the reconciler marks a deployment as complete 3101 func TestReconciler_MarkDeploymentComplete(t *testing.T) { 3102 job := mock.Job() 3103 job.TaskGroups[0].Update = noCanaryUpdate 3104 3105 d := structs.NewDeployment(job) 3106 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3107 Promoted: true, 3108 DesiredTotal: 10, 3109 PlacedAllocs: 10, 3110 HealthyAllocs: 10, 3111 } 3112 3113 // Create allocations from the old job 3114 var allocs []*structs.Allocation 3115 for i := 0; i < 10; i++ { 3116 alloc := mock.Alloc() 3117 alloc.Job = job 3118 alloc.JobID = job.ID 3119 alloc.NodeID = uuid.Generate() 3120 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3121 alloc.TaskGroup = job.TaskGroups[0].Name 3122 alloc.DeploymentID = d.ID 3123 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3124 Healthy: helper.BoolToPtr(true), 3125 } 3126 allocs = append(allocs, alloc) 3127 } 3128 3129 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil) 3130 r := reconciler.Compute() 3131 3132 updates := []*structs.DeploymentStatusUpdate{ 3133 { 3134 DeploymentID: d.ID, 3135 Status: structs.DeploymentStatusSuccessful, 3136 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3137 }, 3138 } 3139 3140 // Assert the correct results 3141 assertResults(t, r, &resultExpectation{ 3142 createDeployment: nil, 3143 deploymentUpdates: updates, 3144 place: 0, 3145 inplace: 0, 3146 stop: 0, 3147 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3148 job.TaskGroups[0].Name: { 3149 Ignore: 10, 3150 }, 3151 }, 3152 }) 3153 } 3154 3155 // Tests the reconciler picks the maximum of the staggers when multiple task 3156 // groups are under going node drains. 3157 func TestReconciler_TaintedNode_MultiGroups(t *testing.T) { 3158 // Create a job with two task groups 3159 job := mock.Job() 3160 job.TaskGroups[0].Update = noCanaryUpdate 3161 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3162 job.TaskGroups[1].Name = "two" 3163 job.TaskGroups[1].Update.Stagger = 100 * time.Second 3164 3165 // Create the allocations 3166 var allocs []*structs.Allocation 3167 for j := 0; j < 2; j++ { 3168 for i := 0; i < 10; i++ { 3169 alloc := mock.Alloc() 3170 alloc.Job = job 3171 alloc.JobID = job.ID 3172 alloc.NodeID = uuid.Generate() 3173 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 3174 alloc.TaskGroup = job.TaskGroups[j].Name 3175 allocs = append(allocs, alloc) 3176 } 3177 } 3178 3179 // Build a map of tainted nodes 3180 tainted := make(map[string]*structs.Node, 15) 3181 for i := 0; i < 15; i++ { 3182 n := mock.Node() 3183 n.ID = allocs[i].NodeID 3184 n.Drain = true 3185 tainted[n.ID] = n 3186 } 3187 3188 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) 3189 r := reconciler.Compute() 3190 3191 // Assert the correct results 3192 assertResults(t, r, &resultExpectation{ 3193 createDeployment: nil, 3194 deploymentUpdates: nil, 3195 place: 8, 3196 inplace: 0, 3197 stop: 8, 3198 followupEvalWait: 100 * time.Second, 3199 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3200 job.TaskGroups[0].Name: { 3201 Place: 0, 3202 Stop: 0, 3203 Migrate: 4, 3204 DestructiveUpdate: 0, 3205 Ignore: 6, 3206 }, 3207 job.TaskGroups[1].Name: { 3208 Place: 0, 3209 Stop: 0, 3210 Migrate: 4, 3211 DestructiveUpdate: 0, 3212 Ignore: 6, 3213 }, 3214 }, 3215 }) 3216 3217 assertNamesHaveIndexes(t, intRange(0, 3, 0, 3), placeResultsToNames(r.place)) 3218 assertNamesHaveIndexes(t, intRange(0, 3, 0, 3), stopResultsToNames(r.stop)) 3219 } 3220 3221 // Tests the reconciler handles changing a job such that a deployment is created 3222 // while doing a scale up but as the second eval. 3223 func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { 3224 // Scale the job up to 15 3225 job := mock.Job() 3226 job.TaskGroups[0].Update = noCanaryUpdate 3227 job.TaskGroups[0].Count = 30 3228 3229 // Create a deployment that is paused and has placed some canaries 3230 d := structs.NewDeployment(job) 3231 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3232 Promoted: false, 3233 DesiredTotal: 30, 3234 PlacedAllocs: 20, 3235 } 3236 3237 // Create 10 allocations from the old job 3238 var allocs []*structs.Allocation 3239 for i := 0; i < 10; i++ { 3240 alloc := mock.Alloc() 3241 alloc.Job = job 3242 alloc.JobID = job.ID 3243 alloc.NodeID = uuid.Generate() 3244 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3245 alloc.TaskGroup = job.TaskGroups[0].Name 3246 allocs = append(allocs, alloc) 3247 } 3248 3249 // Create 20 from new job 3250 handled := make(map[string]allocUpdateType) 3251 for i := 10; i < 30; i++ { 3252 alloc := mock.Alloc() 3253 alloc.Job = job 3254 alloc.JobID = job.ID 3255 alloc.DeploymentID = d.ID 3256 alloc.NodeID = uuid.Generate() 3257 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3258 alloc.TaskGroup = job.TaskGroups[0].Name 3259 allocs = append(allocs, alloc) 3260 handled[alloc.ID] = allocUpdateFnIgnore 3261 } 3262 3263 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3264 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil) 3265 r := reconciler.Compute() 3266 3267 // Assert the correct results 3268 assertResults(t, r, &resultExpectation{ 3269 createDeployment: nil, 3270 deploymentUpdates: nil, 3271 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3272 job.TaskGroups[0].Name: { 3273 // All should be ignored because nothing has been marked as 3274 // healthy. 3275 Ignore: 30, 3276 }, 3277 }, 3278 }) 3279 } 3280 3281 // Tests the reconciler doesn't stop allocations when doing a rolling upgrade 3282 // where the count of the old job allocs is < desired count. 3283 func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { 3284 job := mock.Job() 3285 job.TaskGroups[0].Update = noCanaryUpdate 3286 3287 // Create 7 allocations from the old job 3288 var allocs []*structs.Allocation 3289 for i := 0; i < 7; i++ { 3290 alloc := mock.Alloc() 3291 alloc.Job = job 3292 alloc.JobID = job.ID 3293 alloc.NodeID = uuid.Generate() 3294 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3295 alloc.TaskGroup = job.TaskGroups[0].Name 3296 allocs = append(allocs, alloc) 3297 } 3298 3299 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil) 3300 r := reconciler.Compute() 3301 3302 d := structs.NewDeployment(job) 3303 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3304 DesiredTotal: 10, 3305 } 3306 3307 // Assert the correct results 3308 assertResults(t, r, &resultExpectation{ 3309 createDeployment: d, 3310 deploymentUpdates: nil, 3311 place: 3, 3312 destructive: 1, 3313 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3314 job.TaskGroups[0].Name: { 3315 Place: 3, 3316 DestructiveUpdate: 1, 3317 Ignore: 6, 3318 }, 3319 }, 3320 }) 3321 3322 assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) 3323 assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) 3324 } 3325 3326 // Tests that the reconciler handles rerunning a batch job in the case that the 3327 // allocations are from an older instance of the job. 3328 func TestReconciler_Batch_Rerun(t *testing.T) { 3329 job := mock.Job() 3330 job.Type = structs.JobTypeBatch 3331 job.TaskGroups[0].Update = nil 3332 3333 // Create 10 allocations from the old job and have them be complete 3334 var allocs []*structs.Allocation 3335 for i := 0; i < 10; i++ { 3336 alloc := mock.Alloc() 3337 alloc.Job = job 3338 alloc.JobID = job.ID 3339 alloc.NodeID = uuid.Generate() 3340 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3341 alloc.TaskGroup = job.TaskGroups[0].Name 3342 alloc.ClientStatus = structs.AllocClientStatusComplete 3343 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3344 allocs = append(allocs, alloc) 3345 } 3346 3347 // Create a copy of the job that is "new" 3348 job2 := job.Copy() 3349 job2.CreateIndex++ 3350 3351 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil) 3352 r := reconciler.Compute() 3353 3354 // Assert the correct results 3355 assertResults(t, r, &resultExpectation{ 3356 createDeployment: nil, 3357 deploymentUpdates: nil, 3358 place: 10, 3359 destructive: 0, 3360 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3361 job.TaskGroups[0].Name: { 3362 Place: 10, 3363 DestructiveUpdate: 0, 3364 Ignore: 10, 3365 }, 3366 }, 3367 }) 3368 3369 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 3370 } 3371 3372 // Test that a failed deployment will not result in rescheduling failed allocations 3373 func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { 3374 job := mock.Job() 3375 job.TaskGroups[0].Update = noCanaryUpdate 3376 3377 // Create an existing failed deployment that has some placed allocs 3378 d := structs.NewDeployment(job) 3379 d.Status = structs.DeploymentStatusFailed 3380 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3381 Promoted: true, 3382 DesiredTotal: 5, 3383 PlacedAllocs: 4, 3384 } 3385 3386 // Create 4 allocations and mark two as failed 3387 var allocs []*structs.Allocation 3388 for i := 0; i < 4; i++ { 3389 alloc := mock.Alloc() 3390 alloc.Job = job 3391 alloc.JobID = job.ID 3392 alloc.NodeID = uuid.Generate() 3393 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3394 alloc.TaskGroup = job.TaskGroups[0].Name 3395 allocs = append(allocs, alloc) 3396 } 3397 allocs[2].ClientStatus = structs.AllocClientStatusFailed 3398 allocs[3].ClientStatus = structs.AllocClientStatusFailed 3399 3400 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) 3401 r := reconciler.Compute() 3402 3403 // Assert that no rescheduled placements were created 3404 assertResults(t, r, &resultExpectation{ 3405 place: 0, 3406 createDeployment: nil, 3407 deploymentUpdates: nil, 3408 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3409 job.TaskGroups[0].Name: { 3410 Ignore: 2, 3411 }, 3412 }, 3413 }) 3414 } 3415 3416 // Test that a running deployment with failed allocs will not result in rescheduling failed allocations 3417 func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { 3418 job := mock.Job() 3419 job.TaskGroups[0].Update = noCanaryUpdate 3420 3421 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 3422 d := structs.NewDeployment(job) 3423 d.Status = structs.DeploymentStatusRunning 3424 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3425 Promoted: false, 3426 DesiredTotal: 5, 3427 PlacedAllocs: 4, 3428 } 3429 3430 // Create 4 allocations and mark two as failed 3431 var allocs []*structs.Allocation 3432 for i := 0; i < 4; i++ { 3433 alloc := mock.Alloc() 3434 alloc.Job = job 3435 alloc.JobID = job.ID 3436 alloc.NodeID = uuid.Generate() 3437 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3438 alloc.TaskGroup = job.TaskGroups[0].Name 3439 alloc.DeploymentID = d.ID 3440 allocs = append(allocs, alloc) 3441 } 3442 allocs[2].ClientStatus = structs.AllocClientStatusFailed 3443 allocs[3].ClientStatus = structs.AllocClientStatusFailed 3444 3445 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil) 3446 r := reconciler.Compute() 3447 3448 // Assert that no rescheduled placements were created 3449 assertResults(t, r, &resultExpectation{ 3450 place: 0, 3451 createDeployment: nil, 3452 deploymentUpdates: nil, 3453 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3454 job.TaskGroups[0].Name: { 3455 Ignore: 2, 3456 }, 3457 }, 3458 }) 3459 }