github.com/djenriquez/nomad-1@v0.8.1/scheduler/reconcile_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "log" 6 "os" 7 "reflect" 8 "regexp" 9 "strconv" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/kr/pretty" 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 ) 21 22 /* 23 Basic Tests: 24 √ Place when there is nothing in the cluster 25 √ Place remainder when there is some in the cluster 26 √ Scale down from n to n-m where n != m 27 √ Scale down from n to zero 28 √ Inplace upgrade test 29 √ Inplace upgrade and scale up test 30 √ Inplace upgrade and scale down test 31 √ Destructive upgrade 32 √ Destructive upgrade and scale up test 33 √ Destructive upgrade and scale down test 34 √ Handle lost nodes 35 √ Handle lost nodes and scale up 36 √ Handle lost nodes and scale down 37 √ Handle draining nodes 38 √ Handle draining nodes and scale up 39 √ Handle draining nodes and scale down 40 √ Handle task group being removed 41 √ Handle job being stopped both as .Stopped and nil 42 √ Place more that one group 43 √ Handle delayed rescheduling failed allocs for batch jobs 44 √ Handle delayed rescheduling failed allocs for service jobs 45 √ Handle eligible now rescheduling failed allocs for batch jobs 46 √ Handle eligible now rescheduling failed allocs for service jobs 47 √ Previously rescheduled allocs should not be rescheduled again 48 √ Aggregated evaluations for allocations that fail close together 49 50 Update stanza Tests: 51 √ Stopped job cancels any active deployment 52 √ Stopped job doesn't cancel terminal deployment 53 √ JobIndex change cancels any active deployment 54 √ JobIndex change doesn't cancels any terminal deployment 55 √ Destructive changes create deployment and get rolled out via max_parallelism 56 √ Don't create a deployment if there are no changes 57 √ Deployment created by all inplace updates 58 √ Paused or failed deployment doesn't create any more canaries 59 √ Paused or failed deployment doesn't do any placements unless replacing lost allocs 60 √ Paused or failed deployment doesn't do destructive updates 61 √ Paused does do migrations 62 √ Failed deployment doesn't do migrations 63 √ Canary that is on a draining node 64 √ Canary that is on a lost node 65 √ Stop old canaries 66 √ Create new canaries on job change 67 √ Create new canaries on job change while scaling up 68 √ Create new canaries on job change while scaling down 69 √ Fill canaries if partial placement 70 √ Promote canaries unblocks max_parallel 71 √ Promote canaries when canaries == count 72 √ Only place as many as are healthy in deployment 73 √ Limit calculation accounts for healthy allocs on migrating/lost nodes 74 √ Failed deployment should not place anything 75 √ Run after canaries have been promoted, new allocs have been rolled out and there is no deployment 76 √ Failed deployment cancels non-promoted task groups 77 √ Failed deployment and updated job works 78 √ Finished deployment gets marked as complete 79 √ Change job change while scaling up 80 √ Update the job when all allocations from the previous job haven't been placed yet. 81 √ Paused or failed deployment doesn't do any rescheduling of failed allocs 82 √ Running deployment with failed allocs doesn't do any rescheduling of failed allocs 83 */ 84 85 var ( 86 canaryUpdate = &structs.UpdateStrategy{ 87 Canary: 2, 88 MaxParallel: 2, 89 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 90 MinHealthyTime: 10 * time.Second, 91 HealthyDeadline: 10 * time.Minute, 92 Stagger: 31 * time.Second, 93 } 94 95 noCanaryUpdate = &structs.UpdateStrategy{ 96 MaxParallel: 4, 97 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 98 MinHealthyTime: 10 * time.Second, 99 HealthyDeadline: 10 * time.Minute, 100 Stagger: 31 * time.Second, 101 } 102 ) 103 104 func testLogger() *log.Logger { 105 return log.New(os.Stderr, "", log.LstdFlags) 106 } 107 108 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 109 return true, false, nil 110 } 111 112 func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 113 return false, true, nil 114 } 115 116 func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 117 // Create a shallow copy 118 newAlloc := existing.CopySkipJob() 119 newAlloc.TaskResources = make(map[string]*structs.Resources) 120 121 // Use the new task resources but keep the network from the old 122 for _, task := range newTG.Tasks { 123 r := task.Resources.Copy() 124 r.Networks = existing.TaskResources[task.Name].Networks 125 newAlloc.TaskResources[task.Name] = r 126 } 127 128 return false, false, newAlloc 129 } 130 131 func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { 132 return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 133 if fn, ok := handled[existing.ID]; ok { 134 return fn(existing, newJob, newTG) 135 } 136 137 return unhandled(existing, newJob, newTG) 138 } 139 } 140 141 var ( 142 // AllocationIndexRegex is a regular expression to find the allocation index. 143 allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 144 ) 145 146 // allocNameToIndex returns the index of the allocation. 147 func allocNameToIndex(name string) uint { 148 matches := allocationIndexRegex.FindStringSubmatch(name) 149 if len(matches) != 2 { 150 return 0 151 } 152 153 index, err := strconv.Atoi(matches[1]) 154 if err != nil { 155 return 0 156 } 157 158 return uint(index) 159 } 160 161 func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { 162 t.Helper() 163 m := make(map[uint]int) 164 for _, i := range indexes { 165 m[uint(i)] += 1 166 } 167 168 for _, n := range names { 169 index := allocNameToIndex(n) 170 val, contained := m[index] 171 if !contained { 172 t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names) 173 } 174 175 val-- 176 if val < 0 { 177 t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names) 178 } 179 m[index] = val 180 } 181 182 for k, remainder := range m { 183 if remainder != 0 { 184 t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names) 185 } 186 } 187 } 188 189 func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { 190 t.Helper() 191 canaryIndex := make(map[string]struct{}) 192 for _, state := range d.TaskGroups { 193 for _, c := range state.PlacedCanaries { 194 canaryIndex[c] = struct{}{} 195 } 196 } 197 198 for _, s := range stop { 199 if _, ok := canaryIndex[s.alloc.ID]; ok { 200 t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) 201 } 202 } 203 } 204 205 func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { 206 t.Helper() 207 names := make(map[string]struct{}, numPrevious) 208 209 found := 0 210 for _, p := range place { 211 if _, ok := names[p.name]; ok { 212 t.Fatalf("Name %q already placed", p.name) 213 } 214 names[p.name] = struct{}{} 215 216 if p.previousAlloc == nil { 217 continue 218 } 219 220 if act := p.previousAlloc.Name; p.name != act { 221 t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) 222 } 223 found++ 224 } 225 if numPrevious != found { 226 t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) 227 } 228 } 229 230 func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { 231 t.Helper() 232 names := make(map[string]struct{}, numRescheduled) 233 234 found := 0 235 for _, p := range place { 236 if _, ok := names[p.name]; ok { 237 t.Fatalf("Name %q already placed", p.name) 238 } 239 names[p.name] = struct{}{} 240 241 if p.previousAlloc == nil { 242 continue 243 } 244 if p.reschedule { 245 found++ 246 } 247 248 } 249 if numRescheduled != found { 250 t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found) 251 } 252 } 253 254 func intRange(pairs ...int) []int { 255 if len(pairs)%2 != 0 { 256 return nil 257 } 258 259 var r []int 260 for i := 0; i < len(pairs); i += 2 { 261 for j := pairs[i]; j <= pairs[i+1]; j++ { 262 r = append(r, j) 263 } 264 } 265 return r 266 } 267 268 func placeResultsToNames(place []allocPlaceResult) []string { 269 names := make([]string, 0, len(place)) 270 for _, p := range place { 271 names = append(names, p.name) 272 } 273 return names 274 } 275 276 func destructiveResultsToNames(destructive []allocDestructiveResult) []string { 277 names := make([]string, 0, len(destructive)) 278 for _, d := range destructive { 279 names = append(names, d.placeName) 280 } 281 return names 282 } 283 284 func stopResultsToNames(stop []allocStopResult) []string { 285 names := make([]string, 0, len(stop)) 286 for _, s := range stop { 287 names = append(names, s.alloc.Name) 288 } 289 return names 290 } 291 292 func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string { 293 names := make([]string, 0, len(attributeUpdates)) 294 for _, a := range attributeUpdates { 295 names = append(names, a.Name) 296 } 297 return names 298 } 299 300 func allocsToNames(allocs []*structs.Allocation) []string { 301 names := make([]string, 0, len(allocs)) 302 for _, a := range allocs { 303 names = append(names, a.Name) 304 } 305 return names 306 } 307 308 type resultExpectation struct { 309 createDeployment *structs.Deployment 310 deploymentUpdates []*structs.DeploymentStatusUpdate 311 place int 312 destructive int 313 inplace int 314 attributeUpdates int 315 stop int 316 desiredTGUpdates map[string]*structs.DesiredUpdates 317 } 318 319 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { 320 t.Helper() 321 assert := assert.New(t) 322 323 if exp.createDeployment != nil && r.deployment == nil { 324 t.Fatalf("Expect a created deployment got none") 325 } else if exp.createDeployment == nil && r.deployment != nil { 326 t.Fatalf("Expect no created deployment; got %#v", r.deployment) 327 } else if exp.createDeployment != nil && r.deployment != nil { 328 // Clear the deployment ID 329 r.deployment.ID, exp.createDeployment.ID = "", "" 330 if !reflect.DeepEqual(r.deployment, exp.createDeployment) { 331 t.Fatalf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", 332 r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment)) 333 } 334 } 335 336 assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates") 337 assert.Len(r.place, exp.place, "Expected Placements") 338 assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive") 339 assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates") 340 assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates") 341 assert.Len(r.stop, exp.stop, "Expected Stops") 342 assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations") 343 } 344 345 // Tests the reconciler properly handles placements for a job that has no 346 // existing allocations 347 func TestReconciler_Place_NoExisting(t *testing.T) { 348 job := mock.Job() 349 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "") 350 r := reconciler.Compute() 351 352 // Assert the correct results 353 assertResults(t, r, &resultExpectation{ 354 createDeployment: nil, 355 deploymentUpdates: nil, 356 place: 10, 357 inplace: 0, 358 stop: 0, 359 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 360 job.TaskGroups[0].Name: { 361 Place: 10, 362 }, 363 }, 364 }) 365 366 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 367 } 368 369 // Tests the reconciler properly handles placements for a job that has some 370 // existing allocations 371 func TestReconciler_Place_Existing(t *testing.T) { 372 job := mock.Job() 373 374 // Create 3 existing allocations 375 var allocs []*structs.Allocation 376 for i := 0; i < 5; i++ { 377 alloc := mock.Alloc() 378 alloc.Job = job 379 alloc.JobID = job.ID 380 alloc.NodeID = uuid.Generate() 381 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 382 allocs = append(allocs, alloc) 383 } 384 385 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 386 r := reconciler.Compute() 387 388 // Assert the correct results 389 assertResults(t, r, &resultExpectation{ 390 createDeployment: nil, 391 deploymentUpdates: nil, 392 place: 5, 393 inplace: 0, 394 stop: 0, 395 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 396 job.TaskGroups[0].Name: { 397 Place: 5, 398 Ignore: 5, 399 }, 400 }, 401 }) 402 403 assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) 404 } 405 406 // Tests the reconciler properly handles stopping allocations for a job that has 407 // scaled down 408 func TestReconciler_ScaleDown_Partial(t *testing.T) { 409 // Has desired 10 410 job := mock.Job() 411 412 // Create 20 existing allocations 413 var allocs []*structs.Allocation 414 for i := 0; i < 20; i++ { 415 alloc := mock.Alloc() 416 alloc.Job = job 417 alloc.JobID = job.ID 418 alloc.NodeID = uuid.Generate() 419 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 420 allocs = append(allocs, alloc) 421 } 422 423 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 424 r := reconciler.Compute() 425 426 // Assert the correct results 427 assertResults(t, r, &resultExpectation{ 428 createDeployment: nil, 429 deploymentUpdates: nil, 430 place: 0, 431 inplace: 0, 432 stop: 10, 433 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 434 job.TaskGroups[0].Name: { 435 Ignore: 10, 436 Stop: 10, 437 }, 438 }, 439 }) 440 441 assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) 442 } 443 444 // Tests the reconciler properly handles stopping allocations for a job that has 445 // scaled down to zero desired 446 func TestReconciler_ScaleDown_Zero(t *testing.T) { 447 // Set desired 0 448 job := mock.Job() 449 job.TaskGroups[0].Count = 0 450 451 // Create 20 existing allocations 452 var allocs []*structs.Allocation 453 for i := 0; i < 20; i++ { 454 alloc := mock.Alloc() 455 alloc.Job = job 456 alloc.JobID = job.ID 457 alloc.NodeID = uuid.Generate() 458 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 459 allocs = append(allocs, alloc) 460 } 461 462 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 463 r := reconciler.Compute() 464 465 // Assert the correct results 466 assertResults(t, r, &resultExpectation{ 467 createDeployment: nil, 468 deploymentUpdates: nil, 469 place: 0, 470 inplace: 0, 471 stop: 20, 472 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 473 job.TaskGroups[0].Name: { 474 Stop: 20, 475 }, 476 }, 477 }) 478 479 assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) 480 } 481 482 // Tests the reconciler properly handles stopping allocations for a job that has 483 // scaled down to zero desired where allocs have duplicate names 484 func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { 485 // Set desired 0 486 job := mock.Job() 487 job.TaskGroups[0].Count = 0 488 489 // Create 20 existing allocations 490 var allocs []*structs.Allocation 491 var expectedStopped []int 492 for i := 0; i < 20; i++ { 493 alloc := mock.Alloc() 494 alloc.Job = job 495 alloc.JobID = job.ID 496 alloc.NodeID = uuid.Generate() 497 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 498 allocs = append(allocs, alloc) 499 expectedStopped = append(expectedStopped, i%2) 500 } 501 502 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 503 r := reconciler.Compute() 504 505 // Assert the correct results 506 assertResults(t, r, &resultExpectation{ 507 createDeployment: nil, 508 deploymentUpdates: nil, 509 place: 0, 510 inplace: 0, 511 stop: 20, 512 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 513 job.TaskGroups[0].Name: { 514 Stop: 20, 515 }, 516 }, 517 }) 518 519 assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) 520 } 521 522 // Tests the reconciler properly handles inplace upgrading allocations 523 func TestReconciler_Inplace(t *testing.T) { 524 job := mock.Job() 525 526 // Create 10 existing allocations 527 var allocs []*structs.Allocation 528 for i := 0; i < 10; i++ { 529 alloc := mock.Alloc() 530 alloc.Job = job 531 alloc.JobID = job.ID 532 alloc.NodeID = uuid.Generate() 533 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 534 allocs = append(allocs, alloc) 535 } 536 537 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 538 r := reconciler.Compute() 539 540 // Assert the correct results 541 assertResults(t, r, &resultExpectation{ 542 createDeployment: nil, 543 deploymentUpdates: nil, 544 place: 0, 545 inplace: 10, 546 stop: 0, 547 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 548 job.TaskGroups[0].Name: { 549 InPlaceUpdate: 10, 550 }, 551 }, 552 }) 553 554 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 555 } 556 557 // Tests the reconciler properly handles inplace upgrading allocations while 558 // scaling up 559 func TestReconciler_Inplace_ScaleUp(t *testing.T) { 560 // Set desired 15 561 job := mock.Job() 562 job.TaskGroups[0].Count = 15 563 564 // Create 10 existing allocations 565 var allocs []*structs.Allocation 566 for i := 0; i < 10; i++ { 567 alloc := mock.Alloc() 568 alloc.Job = job 569 alloc.JobID = job.ID 570 alloc.NodeID = uuid.Generate() 571 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 572 allocs = append(allocs, alloc) 573 } 574 575 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 576 r := reconciler.Compute() 577 578 // Assert the correct results 579 assertResults(t, r, &resultExpectation{ 580 createDeployment: nil, 581 deploymentUpdates: nil, 582 place: 5, 583 inplace: 10, 584 stop: 0, 585 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 586 job.TaskGroups[0].Name: { 587 Place: 5, 588 InPlaceUpdate: 10, 589 }, 590 }, 591 }) 592 593 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 594 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 595 } 596 597 // Tests the reconciler properly handles inplace upgrading allocations while 598 // scaling down 599 func TestReconciler_Inplace_ScaleDown(t *testing.T) { 600 // Set desired 5 601 job := mock.Job() 602 job.TaskGroups[0].Count = 5 603 604 // Create 10 existing allocations 605 var allocs []*structs.Allocation 606 for i := 0; i < 10; i++ { 607 alloc := mock.Alloc() 608 alloc.Job = job 609 alloc.JobID = job.ID 610 alloc.NodeID = uuid.Generate() 611 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 612 allocs = append(allocs, alloc) 613 } 614 615 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 616 r := reconciler.Compute() 617 618 // Assert the correct results 619 assertResults(t, r, &resultExpectation{ 620 createDeployment: nil, 621 deploymentUpdates: nil, 622 place: 0, 623 inplace: 5, 624 stop: 5, 625 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 626 job.TaskGroups[0].Name: { 627 Stop: 5, 628 InPlaceUpdate: 5, 629 }, 630 }, 631 }) 632 633 assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) 634 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 635 } 636 637 // Tests the reconciler properly handles destructive upgrading allocations 638 func TestReconciler_Destructive(t *testing.T) { 639 job := mock.Job() 640 641 // Create 10 existing allocations 642 var allocs []*structs.Allocation 643 for i := 0; i < 10; i++ { 644 alloc := mock.Alloc() 645 alloc.Job = job 646 alloc.JobID = job.ID 647 alloc.NodeID = uuid.Generate() 648 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 649 allocs = append(allocs, alloc) 650 } 651 652 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 653 r := reconciler.Compute() 654 655 // Assert the correct results 656 assertResults(t, r, &resultExpectation{ 657 createDeployment: nil, 658 deploymentUpdates: nil, 659 destructive: 10, 660 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 661 job.TaskGroups[0].Name: { 662 DestructiveUpdate: 10, 663 }, 664 }, 665 }) 666 667 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 668 } 669 670 // Tests the reconciler properly handles destructive upgrading allocations while 671 // scaling up 672 func TestReconciler_Destructive_ScaleUp(t *testing.T) { 673 // Set desired 15 674 job := mock.Job() 675 job.TaskGroups[0].Count = 15 676 677 // Create 10 existing allocations 678 var allocs []*structs.Allocation 679 for i := 0; i < 10; i++ { 680 alloc := mock.Alloc() 681 alloc.Job = job 682 alloc.JobID = job.ID 683 alloc.NodeID = uuid.Generate() 684 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 685 allocs = append(allocs, alloc) 686 } 687 688 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 689 r := reconciler.Compute() 690 691 // Assert the correct results 692 assertResults(t, r, &resultExpectation{ 693 createDeployment: nil, 694 deploymentUpdates: nil, 695 place: 5, 696 destructive: 10, 697 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 698 job.TaskGroups[0].Name: { 699 Place: 5, 700 DestructiveUpdate: 10, 701 }, 702 }, 703 }) 704 705 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 706 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 707 } 708 709 // Tests the reconciler properly handles destructive upgrading allocations while 710 // scaling down 711 func TestReconciler_Destructive_ScaleDown(t *testing.T) { 712 // Set desired 5 713 job := mock.Job() 714 job.TaskGroups[0].Count = 5 715 716 // Create 10 existing allocations 717 var allocs []*structs.Allocation 718 for i := 0; i < 10; i++ { 719 alloc := mock.Alloc() 720 alloc.Job = job 721 alloc.JobID = job.ID 722 alloc.NodeID = uuid.Generate() 723 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 724 allocs = append(allocs, alloc) 725 } 726 727 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 728 r := reconciler.Compute() 729 730 // Assert the correct results 731 assertResults(t, r, &resultExpectation{ 732 createDeployment: nil, 733 deploymentUpdates: nil, 734 destructive: 5, 735 stop: 5, 736 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 737 job.TaskGroups[0].Name: { 738 Stop: 5, 739 DestructiveUpdate: 5, 740 }, 741 }, 742 }) 743 744 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 745 assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) 746 } 747 748 // Tests the reconciler properly handles lost nodes with allocations 749 func TestReconciler_LostNode(t *testing.T) { 750 job := mock.Job() 751 752 // Create 10 existing allocations 753 var allocs []*structs.Allocation 754 for i := 0; i < 10; i++ { 755 alloc := mock.Alloc() 756 alloc.Job = job 757 alloc.JobID = job.ID 758 alloc.NodeID = uuid.Generate() 759 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 760 allocs = append(allocs, alloc) 761 } 762 763 // Build a map of tainted nodes 764 tainted := make(map[string]*structs.Node, 2) 765 for i := 0; i < 2; i++ { 766 n := mock.Node() 767 n.ID = allocs[i].NodeID 768 n.Status = structs.NodeStatusDown 769 tainted[n.ID] = n 770 } 771 772 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 773 r := reconciler.Compute() 774 775 // Assert the correct results 776 assertResults(t, r, &resultExpectation{ 777 createDeployment: nil, 778 deploymentUpdates: nil, 779 place: 2, 780 inplace: 0, 781 stop: 2, 782 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 783 job.TaskGroups[0].Name: { 784 Place: 2, 785 Stop: 2, 786 Ignore: 8, 787 }, 788 }, 789 }) 790 791 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 792 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 793 } 794 795 // Tests the reconciler properly handles lost nodes with allocations while 796 // scaling up 797 func TestReconciler_LostNode_ScaleUp(t *testing.T) { 798 // Set desired 15 799 job := mock.Job() 800 job.TaskGroups[0].Count = 15 801 802 // Create 10 existing allocations 803 var allocs []*structs.Allocation 804 for i := 0; i < 10; i++ { 805 alloc := mock.Alloc() 806 alloc.Job = job 807 alloc.JobID = job.ID 808 alloc.NodeID = uuid.Generate() 809 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 810 allocs = append(allocs, alloc) 811 } 812 813 // Build a map of tainted nodes 814 tainted := make(map[string]*structs.Node, 2) 815 for i := 0; i < 2; i++ { 816 n := mock.Node() 817 n.ID = allocs[i].NodeID 818 n.Status = structs.NodeStatusDown 819 tainted[n.ID] = n 820 } 821 822 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 823 r := reconciler.Compute() 824 825 // Assert the correct results 826 assertResults(t, r, &resultExpectation{ 827 createDeployment: nil, 828 deploymentUpdates: nil, 829 place: 7, 830 inplace: 0, 831 stop: 2, 832 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 833 job.TaskGroups[0].Name: { 834 Place: 7, 835 Stop: 2, 836 Ignore: 8, 837 }, 838 }, 839 }) 840 841 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 842 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 843 } 844 845 // Tests the reconciler properly handles lost nodes with allocations while 846 // scaling down 847 func TestReconciler_LostNode_ScaleDown(t *testing.T) { 848 // Set desired 5 849 job := mock.Job() 850 job.TaskGroups[0].Count = 5 851 852 // Create 10 existing allocations 853 var allocs []*structs.Allocation 854 for i := 0; i < 10; i++ { 855 alloc := mock.Alloc() 856 alloc.Job = job 857 alloc.JobID = job.ID 858 alloc.NodeID = uuid.Generate() 859 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 860 allocs = append(allocs, alloc) 861 } 862 863 // Build a map of tainted nodes 864 tainted := make(map[string]*structs.Node, 2) 865 for i := 0; i < 2; i++ { 866 n := mock.Node() 867 n.ID = allocs[i].NodeID 868 n.Status = structs.NodeStatusDown 869 tainted[n.ID] = n 870 } 871 872 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 873 r := reconciler.Compute() 874 875 // Assert the correct results 876 assertResults(t, r, &resultExpectation{ 877 createDeployment: nil, 878 deploymentUpdates: nil, 879 place: 0, 880 inplace: 0, 881 stop: 5, 882 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 883 job.TaskGroups[0].Name: { 884 Stop: 5, 885 Ignore: 5, 886 }, 887 }, 888 }) 889 890 assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) 891 } 892 893 // Tests the reconciler properly handles draining nodes with allocations 894 func TestReconciler_DrainNode(t *testing.T) { 895 job := mock.Job() 896 897 // Create 10 existing allocations 898 var allocs []*structs.Allocation 899 for i := 0; i < 10; i++ { 900 alloc := mock.Alloc() 901 alloc.Job = job 902 alloc.JobID = job.ID 903 alloc.NodeID = uuid.Generate() 904 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 905 allocs = append(allocs, alloc) 906 } 907 908 // Build a map of tainted nodes 909 tainted := make(map[string]*structs.Node, 2) 910 for i := 0; i < 2; i++ { 911 n := mock.Node() 912 n.ID = allocs[i].NodeID 913 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 914 n.Drain = true 915 tainted[n.ID] = n 916 } 917 918 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 919 r := reconciler.Compute() 920 921 // Assert the correct results 922 assertResults(t, r, &resultExpectation{ 923 createDeployment: nil, 924 deploymentUpdates: nil, 925 place: 2, 926 inplace: 0, 927 stop: 2, 928 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 929 job.TaskGroups[0].Name: { 930 Migrate: 2, 931 Ignore: 8, 932 }, 933 }, 934 }) 935 936 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 937 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 938 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 939 // These should not have the reschedule field set 940 assertPlacementsAreRescheduled(t, 0, r.place) 941 } 942 943 // Tests the reconciler properly handles draining nodes with allocations while 944 // scaling up 945 func TestReconciler_DrainNode_ScaleUp(t *testing.T) { 946 // Set desired 15 947 job := mock.Job() 948 job.TaskGroups[0].Count = 15 949 950 // Create 10 existing allocations 951 var allocs []*structs.Allocation 952 for i := 0; i < 10; i++ { 953 alloc := mock.Alloc() 954 alloc.Job = job 955 alloc.JobID = job.ID 956 alloc.NodeID = uuid.Generate() 957 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 958 allocs = append(allocs, alloc) 959 } 960 961 // Build a map of tainted nodes 962 tainted := make(map[string]*structs.Node, 2) 963 for i := 0; i < 2; i++ { 964 n := mock.Node() 965 n.ID = allocs[i].NodeID 966 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 967 n.Drain = true 968 tainted[n.ID] = n 969 } 970 971 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 972 r := reconciler.Compute() 973 974 // Assert the correct results 975 assertResults(t, r, &resultExpectation{ 976 createDeployment: nil, 977 deploymentUpdates: nil, 978 place: 7, 979 inplace: 0, 980 stop: 2, 981 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 982 job.TaskGroups[0].Name: { 983 Place: 5, 984 Migrate: 2, 985 Ignore: 8, 986 }, 987 }, 988 }) 989 990 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 991 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 992 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 993 // These should not have the reschedule field set 994 assertPlacementsAreRescheduled(t, 0, r.place) 995 } 996 997 // Tests the reconciler properly handles draining nodes with allocations while 998 // scaling down 999 func TestReconciler_DrainNode_ScaleDown(t *testing.T) { 1000 // Set desired 8 1001 job := mock.Job() 1002 job.TaskGroups[0].Count = 8 1003 1004 // Create 10 existing allocations 1005 var allocs []*structs.Allocation 1006 for i := 0; i < 10; i++ { 1007 alloc := mock.Alloc() 1008 alloc.Job = job 1009 alloc.JobID = job.ID 1010 alloc.NodeID = uuid.Generate() 1011 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1012 allocs = append(allocs, alloc) 1013 } 1014 1015 // Build a map of tainted nodes 1016 tainted := make(map[string]*structs.Node, 3) 1017 for i := 0; i < 3; i++ { 1018 n := mock.Node() 1019 n.ID = allocs[i].NodeID 1020 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 1021 n.Drain = true 1022 tainted[n.ID] = n 1023 } 1024 1025 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 1026 r := reconciler.Compute() 1027 1028 // Assert the correct results 1029 assertResults(t, r, &resultExpectation{ 1030 createDeployment: nil, 1031 deploymentUpdates: nil, 1032 place: 1, 1033 inplace: 0, 1034 stop: 3, 1035 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1036 job.TaskGroups[0].Name: { 1037 Migrate: 1, 1038 Stop: 2, 1039 Ignore: 7, 1040 }, 1041 }, 1042 }) 1043 1044 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 1045 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 1046 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1047 // These should not have the reschedule field set 1048 assertPlacementsAreRescheduled(t, 0, r.place) 1049 } 1050 1051 // Tests the reconciler properly handles a task group being removed 1052 func TestReconciler_RemovedTG(t *testing.T) { 1053 job := mock.Job() 1054 1055 // Create 10 allocations for a tg that no longer exists 1056 var allocs []*structs.Allocation 1057 for i := 0; i < 10; i++ { 1058 alloc := mock.Alloc() 1059 alloc.Job = job 1060 alloc.JobID = job.ID 1061 alloc.NodeID = uuid.Generate() 1062 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1063 allocs = append(allocs, alloc) 1064 } 1065 1066 oldName := job.TaskGroups[0].Name 1067 newName := "different" 1068 job.TaskGroups[0].Name = newName 1069 1070 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1071 r := reconciler.Compute() 1072 1073 // Assert the correct results 1074 assertResults(t, r, &resultExpectation{ 1075 createDeployment: nil, 1076 deploymentUpdates: nil, 1077 place: 10, 1078 inplace: 0, 1079 stop: 10, 1080 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1081 oldName: { 1082 Stop: 10, 1083 }, 1084 newName: { 1085 Place: 10, 1086 }, 1087 }, 1088 }) 1089 1090 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1091 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 1092 } 1093 1094 // Tests the reconciler properly handles a job in stopped states 1095 func TestReconciler_JobStopped(t *testing.T) { 1096 job := mock.Job() 1097 job.Stop = true 1098 1099 cases := []struct { 1100 name string 1101 job *structs.Job 1102 jobID, taskGroup string 1103 }{ 1104 { 1105 name: "stopped job", 1106 job: job, 1107 jobID: job.ID, 1108 taskGroup: job.TaskGroups[0].Name, 1109 }, 1110 { 1111 name: "nil job", 1112 job: nil, 1113 jobID: "foo", 1114 taskGroup: "bar", 1115 }, 1116 } 1117 1118 for _, c := range cases { 1119 t.Run(c.name, func(t *testing.T) { 1120 // Create 10 allocations 1121 var allocs []*structs.Allocation 1122 for i := 0; i < 10; i++ { 1123 alloc := mock.Alloc() 1124 alloc.Job = c.job 1125 alloc.JobID = c.jobID 1126 alloc.NodeID = uuid.Generate() 1127 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1128 alloc.TaskGroup = c.taskGroup 1129 allocs = append(allocs, alloc) 1130 } 1131 1132 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1133 r := reconciler.Compute() 1134 1135 // Assert the correct results 1136 assertResults(t, r, &resultExpectation{ 1137 createDeployment: nil, 1138 deploymentUpdates: nil, 1139 place: 0, 1140 inplace: 0, 1141 stop: 10, 1142 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1143 c.taskGroup: { 1144 Stop: 10, 1145 }, 1146 }, 1147 }) 1148 1149 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1150 }) 1151 } 1152 } 1153 1154 // Tests the reconciler properly handles jobs with multiple task groups 1155 func TestReconciler_MultiTG(t *testing.T) { 1156 job := mock.Job() 1157 tg2 := job.TaskGroups[0].Copy() 1158 tg2.Name = "foo" 1159 job.TaskGroups = append(job.TaskGroups, tg2) 1160 1161 // Create 2 existing allocations for the first tg 1162 var allocs []*structs.Allocation 1163 for i := 0; i < 2; i++ { 1164 alloc := mock.Alloc() 1165 alloc.Job = job 1166 alloc.JobID = job.ID 1167 alloc.NodeID = uuid.Generate() 1168 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1169 allocs = append(allocs, alloc) 1170 } 1171 1172 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1173 r := reconciler.Compute() 1174 1175 // Assert the correct results 1176 assertResults(t, r, &resultExpectation{ 1177 createDeployment: nil, 1178 deploymentUpdates: nil, 1179 place: 18, 1180 inplace: 0, 1181 stop: 0, 1182 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1183 job.TaskGroups[0].Name: { 1184 Place: 8, 1185 Ignore: 2, 1186 }, 1187 tg2.Name: { 1188 Place: 10, 1189 }, 1190 }, 1191 }) 1192 1193 assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) 1194 } 1195 1196 // Tests delayed rescheduling of failed batch allocations 1197 func TestReconciler_RescheduleLater_Batch(t *testing.T) { 1198 require := require.New(t) 1199 1200 // Set desired 4 1201 job := mock.Job() 1202 job.TaskGroups[0].Count = 4 1203 now := time.Now() 1204 1205 // Set up reschedule policy 1206 delayDur := 15 * time.Second 1207 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1208 tgName := job.TaskGroups[0].Name 1209 1210 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1211 var allocs []*structs.Allocation 1212 for i := 0; i < 6; i++ { 1213 alloc := mock.Alloc() 1214 alloc.Job = job 1215 alloc.JobID = job.ID 1216 alloc.NodeID = uuid.Generate() 1217 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1218 allocs = append(allocs, alloc) 1219 alloc.ClientStatus = structs.AllocClientStatusRunning 1220 } 1221 1222 // Mark 3 as failed with restart tracking info 1223 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1224 allocs[0].NextAllocation = allocs[1].ID 1225 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1226 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1227 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1228 PrevAllocID: allocs[0].ID, 1229 PrevNodeID: uuid.Generate(), 1230 }, 1231 }} 1232 allocs[1].NextAllocation = allocs[2].ID 1233 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1234 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1235 StartedAt: now.Add(-1 * time.Hour), 1236 FinishedAt: now}} 1237 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1238 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1239 PrevAllocID: allocs[0].ID, 1240 PrevNodeID: uuid.Generate(), 1241 }, 1242 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1243 PrevAllocID: allocs[1].ID, 1244 PrevNodeID: uuid.Generate(), 1245 }, 1246 }} 1247 1248 // Mark one as complete 1249 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1250 1251 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1252 r := reconciler.Compute() 1253 1254 // Two reschedule attempts were already made, one more can be made at a future time 1255 // Verify that the follow up eval has the expected waitUntil time 1256 evals := r.desiredFollowupEvals[tgName] 1257 require.NotNil(evals) 1258 require.Equal(1, len(evals)) 1259 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1260 1261 // Alloc 5 should not be replaced because it is terminal 1262 assertResults(t, r, &resultExpectation{ 1263 createDeployment: nil, 1264 deploymentUpdates: nil, 1265 place: 0, 1266 inplace: 0, 1267 attributeUpdates: 1, 1268 stop: 0, 1269 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1270 job.TaskGroups[0].Name: { 1271 Place: 0, 1272 InPlaceUpdate: 0, 1273 Ignore: 4, 1274 }, 1275 }, 1276 }) 1277 assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates)) 1278 1279 // Verify that the followup evalID field is set correctly 1280 var annotated *structs.Allocation 1281 for _, a := range r.attributeUpdates { 1282 annotated = a 1283 } 1284 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1285 } 1286 1287 // Tests delayed rescheduling of failed batch allocations and batching of allocs 1288 // with fail times that are close together 1289 func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { 1290 require := require.New(t) 1291 1292 // Set desired 4 1293 job := mock.Job() 1294 job.TaskGroups[0].Count = 10 1295 now := time.Now() 1296 1297 // Set up reschedule policy 1298 delayDur := 15 * time.Second 1299 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1300 tgName := job.TaskGroups[0].Name 1301 1302 // Create 10 existing allocations 1303 var allocs []*structs.Allocation 1304 for i := 0; i < 10; i++ { 1305 alloc := mock.Alloc() 1306 alloc.Job = job 1307 alloc.JobID = job.ID 1308 alloc.NodeID = uuid.Generate() 1309 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1310 allocs = append(allocs, alloc) 1311 alloc.ClientStatus = structs.AllocClientStatusRunning 1312 } 1313 1314 // Mark 5 as failed with fail times very close together 1315 for i := 0; i < 5; i++ { 1316 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1317 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1318 StartedAt: now.Add(-1 * time.Hour), 1319 FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}} 1320 } 1321 1322 // Mark two more as failed several seconds later 1323 for i := 5; i < 7; i++ { 1324 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1325 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1326 StartedAt: now.Add(-1 * time.Hour), 1327 FinishedAt: now.Add(10 * time.Second)}} 1328 } 1329 1330 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1331 r := reconciler.Compute() 1332 1333 // Verify that two follow up evals were created 1334 evals := r.desiredFollowupEvals[tgName] 1335 require.NotNil(evals) 1336 require.Equal(2, len(evals)) 1337 1338 // Verify expected WaitUntil values for both batched evals 1339 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1340 secondBatchDuration := delayDur + 10*time.Second 1341 require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil) 1342 1343 // Alloc 5 should not be replaced because it is terminal 1344 assertResults(t, r, &resultExpectation{ 1345 createDeployment: nil, 1346 deploymentUpdates: nil, 1347 place: 0, 1348 inplace: 0, 1349 attributeUpdates: 7, 1350 stop: 0, 1351 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1352 job.TaskGroups[0].Name: { 1353 Place: 0, 1354 InPlaceUpdate: 0, 1355 Ignore: 10, 1356 }, 1357 }, 1358 }) 1359 assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates)) 1360 1361 // Verify that the followup evalID field is set correctly 1362 for _, alloc := range r.attributeUpdates { 1363 if allocNameToIndex(alloc.Name) < 5 { 1364 require.Equal(evals[0].ID, alloc.FollowupEvalID) 1365 } else if allocNameToIndex(alloc.Name) < 7 { 1366 require.Equal(evals[1].ID, alloc.FollowupEvalID) 1367 } else { 1368 t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name) 1369 } 1370 } 1371 } 1372 1373 // Tests rescheduling failed batch allocations 1374 func TestReconciler_RescheduleNow_Batch(t *testing.T) { 1375 require := require.New(t) 1376 // Set desired 4 1377 job := mock.Job() 1378 job.TaskGroups[0].Count = 4 1379 now := time.Now() 1380 // Set up reschedule policy 1381 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"} 1382 tgName := job.TaskGroups[0].Name 1383 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1384 var allocs []*structs.Allocation 1385 for i := 0; i < 6; i++ { 1386 alloc := mock.Alloc() 1387 alloc.Job = job 1388 alloc.JobID = job.ID 1389 alloc.NodeID = uuid.Generate() 1390 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1391 allocs = append(allocs, alloc) 1392 alloc.ClientStatus = structs.AllocClientStatusRunning 1393 } 1394 // Mark 3 as failed with restart tracking info 1395 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1396 allocs[0].NextAllocation = allocs[1].ID 1397 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1398 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1399 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1400 PrevAllocID: allocs[0].ID, 1401 PrevNodeID: uuid.Generate(), 1402 }, 1403 }} 1404 allocs[1].NextAllocation = allocs[2].ID 1405 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1406 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1407 StartedAt: now.Add(-1 * time.Hour), 1408 FinishedAt: now.Add(-5 * time.Second)}} 1409 allocs[2].FollowupEvalID = uuid.Generate() 1410 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1411 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1412 PrevAllocID: allocs[0].ID, 1413 PrevNodeID: uuid.Generate(), 1414 }, 1415 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1416 PrevAllocID: allocs[1].ID, 1417 PrevNodeID: uuid.Generate(), 1418 }, 1419 }} 1420 // Mark one as complete 1421 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1422 1423 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "") 1424 reconciler.now = now 1425 r := reconciler.Compute() 1426 1427 // Verify that no follow up evals were created 1428 evals := r.desiredFollowupEvals[tgName] 1429 require.Nil(evals) 1430 1431 // Two reschedule attempts were made, one more can be made now 1432 // Alloc 5 should not be replaced because it is terminal 1433 assertResults(t, r, &resultExpectation{ 1434 createDeployment: nil, 1435 deploymentUpdates: nil, 1436 place: 1, 1437 inplace: 0, 1438 stop: 0, 1439 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1440 job.TaskGroups[0].Name: { 1441 Place: 1, 1442 Ignore: 3, 1443 }, 1444 }, 1445 }) 1446 1447 assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) 1448 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1449 assertPlacementsAreRescheduled(t, 1, r.place) 1450 1451 } 1452 1453 // Tests rescheduling failed service allocations with desired state stop 1454 func TestReconciler_RescheduleLater_Service(t *testing.T) { 1455 require := require.New(t) 1456 1457 // Set desired 5 1458 job := mock.Job() 1459 job.TaskGroups[0].Count = 5 1460 tgName := job.TaskGroups[0].Name 1461 now := time.Now() 1462 1463 // Set up reschedule policy 1464 delayDur := 15 * time.Second 1465 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} 1466 1467 // Create 5 existing allocations 1468 var allocs []*structs.Allocation 1469 for i := 0; i < 5; i++ { 1470 alloc := mock.Alloc() 1471 alloc.Job = job 1472 alloc.JobID = job.ID 1473 alloc.NodeID = uuid.Generate() 1474 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1475 allocs = append(allocs, alloc) 1476 alloc.ClientStatus = structs.AllocClientStatusRunning 1477 } 1478 1479 // Mark two as failed 1480 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1481 1482 // Mark one of them as already rescheduled once 1483 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1484 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1485 PrevAllocID: uuid.Generate(), 1486 PrevNodeID: uuid.Generate(), 1487 }, 1488 }} 1489 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1490 StartedAt: now.Add(-1 * time.Hour), 1491 FinishedAt: now}} 1492 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1493 1494 // Mark one as desired state stop 1495 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1496 1497 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate()) 1498 r := reconciler.Compute() 1499 1500 // Should place a new placement and create a follow up eval for the delayed reschedule 1501 // Verify that the follow up eval has the expected waitUntil time 1502 evals := r.desiredFollowupEvals[tgName] 1503 require.NotNil(evals) 1504 require.Equal(1, len(evals)) 1505 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1506 1507 assertResults(t, r, &resultExpectation{ 1508 createDeployment: nil, 1509 deploymentUpdates: nil, 1510 place: 1, 1511 inplace: 0, 1512 attributeUpdates: 1, 1513 stop: 0, 1514 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1515 job.TaskGroups[0].Name: { 1516 Place: 1, 1517 InPlaceUpdate: 0, 1518 Ignore: 4, 1519 }, 1520 }, 1521 }) 1522 1523 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1524 assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates)) 1525 1526 // Verify that the followup evalID field is set correctly 1527 var annotated *structs.Allocation 1528 for _, a := range r.attributeUpdates { 1529 annotated = a 1530 } 1531 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1532 } 1533 1534 // Tests service allocations with client status complete 1535 func TestReconciler_Service_ClientStatusComplete(t *testing.T) { 1536 // Set desired 5 1537 job := mock.Job() 1538 job.TaskGroups[0].Count = 5 1539 1540 // Set up reschedule policy 1541 delayDur := 15 * time.Second 1542 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1543 Attempts: 1, 1544 Interval: 24 * time.Hour, 1545 Delay: delayDur, 1546 MaxDelay: 1 * time.Hour, 1547 } 1548 1549 // Create 5 existing allocations 1550 var allocs []*structs.Allocation 1551 for i := 0; i < 5; i++ { 1552 alloc := mock.Alloc() 1553 alloc.Job = job 1554 alloc.JobID = job.ID 1555 alloc.NodeID = uuid.Generate() 1556 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1557 allocs = append(allocs, alloc) 1558 alloc.ClientStatus = structs.AllocClientStatusRunning 1559 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1560 } 1561 1562 // Mark one as client status complete 1563 allocs[4].ClientStatus = structs.AllocClientStatusComplete 1564 1565 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1566 r := reconciler.Compute() 1567 1568 // Should place a new placement for the alloc that was marked complete 1569 assertResults(t, r, &resultExpectation{ 1570 createDeployment: nil, 1571 deploymentUpdates: nil, 1572 place: 1, 1573 inplace: 0, 1574 stop: 0, 1575 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1576 job.TaskGroups[0].Name: { 1577 Place: 1, 1578 InPlaceUpdate: 0, 1579 Ignore: 4, 1580 }, 1581 }, 1582 }) 1583 1584 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1585 1586 } 1587 1588 // Tests service job placement with desired stop and client status complete 1589 func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { 1590 // Set desired 5 1591 job := mock.Job() 1592 job.TaskGroups[0].Count = 5 1593 1594 // Set up reschedule policy 1595 delayDur := 15 * time.Second 1596 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1597 Attempts: 1, 1598 Interval: 24 * time.Hour, 1599 Delay: delayDur, 1600 MaxDelay: 1 * time.Hour, 1601 } 1602 1603 // Create 5 existing allocations 1604 var allocs []*structs.Allocation 1605 for i := 0; i < 5; i++ { 1606 alloc := mock.Alloc() 1607 alloc.Job = job 1608 alloc.JobID = job.ID 1609 alloc.NodeID = uuid.Generate() 1610 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1611 allocs = append(allocs, alloc) 1612 alloc.ClientStatus = structs.AllocClientStatusRunning 1613 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1614 } 1615 1616 // Mark one as failed but with desired status stop 1617 // Should not trigger rescheduling logic but should trigger a placement 1618 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1619 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1620 1621 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1622 r := reconciler.Compute() 1623 1624 // Should place a new placement for the alloc that was marked stopped 1625 assertResults(t, r, &resultExpectation{ 1626 createDeployment: nil, 1627 deploymentUpdates: nil, 1628 place: 1, 1629 inplace: 0, 1630 stop: 0, 1631 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1632 job.TaskGroups[0].Name: { 1633 Place: 1, 1634 InPlaceUpdate: 0, 1635 Ignore: 4, 1636 }, 1637 }, 1638 }) 1639 1640 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1641 1642 // Should not have any follow up evals created 1643 require := require.New(t) 1644 require.Equal(0, len(r.desiredFollowupEvals)) 1645 } 1646 1647 // Tests rescheduling failed service allocations with desired state stop 1648 func TestReconciler_RescheduleNow_Service(t *testing.T) { 1649 require := require.New(t) 1650 1651 // Set desired 5 1652 job := mock.Job() 1653 job.TaskGroups[0].Count = 5 1654 tgName := job.TaskGroups[0].Name 1655 now := time.Now() 1656 1657 // Set up reschedule policy and update stanza 1658 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1659 Attempts: 1, 1660 Interval: 24 * time.Hour, 1661 Delay: 5 * time.Second, 1662 DelayFunction: "", 1663 MaxDelay: 1 * time.Hour, 1664 Unlimited: false, 1665 } 1666 job.TaskGroups[0].Update = noCanaryUpdate 1667 1668 // Create 5 existing allocations 1669 var allocs []*structs.Allocation 1670 for i := 0; i < 5; i++ { 1671 alloc := mock.Alloc() 1672 alloc.Job = job 1673 alloc.JobID = job.ID 1674 alloc.NodeID = uuid.Generate() 1675 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1676 allocs = append(allocs, alloc) 1677 alloc.ClientStatus = structs.AllocClientStatusRunning 1678 } 1679 1680 // Mark two as failed 1681 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1682 1683 // Mark one of them as already rescheduled once 1684 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1685 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1686 PrevAllocID: uuid.Generate(), 1687 PrevNodeID: uuid.Generate(), 1688 }, 1689 }} 1690 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1691 StartedAt: now.Add(-1 * time.Hour), 1692 FinishedAt: now.Add(-10 * time.Second)}} 1693 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1694 1695 // Mark one as desired state stop 1696 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1697 1698 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1699 r := reconciler.Compute() 1700 1701 // Verify that no follow up evals were created 1702 evals := r.desiredFollowupEvals[tgName] 1703 require.Nil(evals) 1704 1705 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 1706 assertResults(t, r, &resultExpectation{ 1707 createDeployment: nil, 1708 deploymentUpdates: nil, 1709 place: 2, 1710 inplace: 0, 1711 stop: 0, 1712 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1713 job.TaskGroups[0].Name: { 1714 Place: 2, 1715 Ignore: 3, 1716 }, 1717 }, 1718 }) 1719 1720 // Rescheduled allocs should have previous allocs 1721 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 1722 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1723 assertPlacementsAreRescheduled(t, 1, r.place) 1724 } 1725 1726 // Tests rescheduling failed service allocations when there's clock drift (upto a second) 1727 func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { 1728 require := require.New(t) 1729 1730 // Set desired 5 1731 job := mock.Job() 1732 job.TaskGroups[0].Count = 5 1733 tgName := job.TaskGroups[0].Name 1734 now := time.Now() 1735 1736 // Set up reschedule policy and update stanza 1737 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1738 Attempts: 1, 1739 Interval: 24 * time.Hour, 1740 Delay: 5 * time.Second, 1741 DelayFunction: "", 1742 MaxDelay: 1 * time.Hour, 1743 Unlimited: false, 1744 } 1745 job.TaskGroups[0].Update = noCanaryUpdate 1746 1747 // Create 5 existing allocations 1748 var allocs []*structs.Allocation 1749 for i := 0; i < 5; i++ { 1750 alloc := mock.Alloc() 1751 alloc.Job = job 1752 alloc.JobID = job.ID 1753 alloc.NodeID = uuid.Generate() 1754 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1755 allocs = append(allocs, alloc) 1756 alloc.ClientStatus = structs.AllocClientStatusRunning 1757 } 1758 1759 // Mark one as failed 1760 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1761 1762 // Mark one of them as already rescheduled once 1763 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1764 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1765 PrevAllocID: uuid.Generate(), 1766 PrevNodeID: uuid.Generate(), 1767 }, 1768 }} 1769 // Set fail time to 4 seconds ago which falls within the reschedule window 1770 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1771 StartedAt: now.Add(-1 * time.Hour), 1772 FinishedAt: now.Add(-4 * time.Second)}} 1773 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1774 1775 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1776 reconciler.now = now 1777 r := reconciler.Compute() 1778 1779 // Verify that no follow up evals were created 1780 evals := r.desiredFollowupEvals[tgName] 1781 require.Nil(evals) 1782 1783 // Verify that one rescheduled alloc was placed 1784 assertResults(t, r, &resultExpectation{ 1785 createDeployment: nil, 1786 deploymentUpdates: nil, 1787 place: 1, 1788 inplace: 0, 1789 stop: 0, 1790 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1791 job.TaskGroups[0].Name: { 1792 Place: 1, 1793 Ignore: 4, 1794 }, 1795 }, 1796 }) 1797 1798 // Rescheduled allocs should have previous allocs 1799 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1800 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1801 assertPlacementsAreRescheduled(t, 1, r.place) 1802 } 1803 1804 // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift 1805 func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { 1806 require := require.New(t) 1807 1808 // Set desired 5 1809 job := mock.Job() 1810 job.TaskGroups[0].Count = 5 1811 tgName := job.TaskGroups[0].Name 1812 now := time.Now() 1813 1814 // Set up reschedule policy and update stanza 1815 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1816 Attempts: 1, 1817 Interval: 24 * time.Hour, 1818 Delay: 5 * time.Second, 1819 DelayFunction: "", 1820 MaxDelay: 1 * time.Hour, 1821 Unlimited: false, 1822 } 1823 job.TaskGroups[0].Update = noCanaryUpdate 1824 1825 // Create 5 existing allocations 1826 var allocs []*structs.Allocation 1827 for i := 0; i < 5; i++ { 1828 alloc := mock.Alloc() 1829 alloc.Job = job 1830 alloc.JobID = job.ID 1831 alloc.NodeID = uuid.Generate() 1832 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1833 allocs = append(allocs, alloc) 1834 alloc.ClientStatus = structs.AllocClientStatusRunning 1835 } 1836 1837 // Mark one as failed 1838 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1839 1840 // Mark one of them as already rescheduled once 1841 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1842 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1843 PrevAllocID: uuid.Generate(), 1844 PrevNodeID: uuid.Generate(), 1845 }, 1846 }} 1847 // Set fail time to 5 seconds ago and eval ID 1848 evalID := uuid.Generate() 1849 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1850 StartedAt: now.Add(-1 * time.Hour), 1851 FinishedAt: now.Add(-5 * time.Second)}} 1852 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1853 allocs[1].FollowupEvalID = evalID 1854 1855 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID) 1856 reconciler.now = now.Add(-30 * time.Second) 1857 r := reconciler.Compute() 1858 1859 // Verify that no follow up evals were created 1860 evals := r.desiredFollowupEvals[tgName] 1861 require.Nil(evals) 1862 1863 // Verify that one rescheduled alloc was placed 1864 assertResults(t, r, &resultExpectation{ 1865 createDeployment: nil, 1866 deploymentUpdates: nil, 1867 place: 1, 1868 inplace: 0, 1869 stop: 0, 1870 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1871 job.TaskGroups[0].Name: { 1872 Place: 1, 1873 Ignore: 4, 1874 }, 1875 }, 1876 }) 1877 1878 // Rescheduled allocs should have previous allocs 1879 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1880 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1881 assertPlacementsAreRescheduled(t, 1, r.place) 1882 } 1883 1884 // Tests failed service allocations that were already rescheduled won't be rescheduled again 1885 func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { 1886 // Set desired 5 1887 job := mock.Job() 1888 job.TaskGroups[0].Count = 5 1889 1890 // Set up reschedule policy 1891 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour} 1892 1893 // Create 7 existing allocations 1894 var allocs []*structs.Allocation 1895 for i := 0; i < 7; i++ { 1896 alloc := mock.Alloc() 1897 alloc.Job = job 1898 alloc.JobID = job.ID 1899 alloc.NodeID = uuid.Generate() 1900 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1901 allocs = append(allocs, alloc) 1902 alloc.ClientStatus = structs.AllocClientStatusRunning 1903 } 1904 // Mark two as failed and rescheduled 1905 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1906 allocs[0].ID = allocs[1].ID 1907 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1908 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1909 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1910 PrevAllocID: uuid.Generate(), 1911 PrevNodeID: uuid.Generate(), 1912 }, 1913 }} 1914 allocs[1].NextAllocation = allocs[2].ID 1915 1916 // Mark one as desired state stop 1917 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1918 1919 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1920 r := reconciler.Compute() 1921 1922 // Should place 1 - one is a new placement to make up the desired count of 5 1923 // failing allocs are not rescheduled 1924 assertResults(t, r, &resultExpectation{ 1925 createDeployment: nil, 1926 deploymentUpdates: nil, 1927 place: 1, 1928 inplace: 0, 1929 stop: 0, 1930 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1931 job.TaskGroups[0].Name: { 1932 Place: 1, 1933 Ignore: 4, 1934 }, 1935 }, 1936 }) 1937 1938 // name index 0 is used for the replacement because its 1939 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 1940 } 1941 1942 // Tests the reconciler cancels an old deployment when the job is being stopped 1943 func TestReconciler_CancelDeployment_JobStop(t *testing.T) { 1944 job := mock.Job() 1945 job.Stop = true 1946 1947 running := structs.NewDeployment(job) 1948 failed := structs.NewDeployment(job) 1949 failed.Status = structs.DeploymentStatusFailed 1950 1951 cases := []struct { 1952 name string 1953 job *structs.Job 1954 jobID, taskGroup string 1955 deployment *structs.Deployment 1956 cancel bool 1957 }{ 1958 { 1959 name: "stopped job, running deployment", 1960 job: job, 1961 jobID: job.ID, 1962 taskGroup: job.TaskGroups[0].Name, 1963 deployment: running, 1964 cancel: true, 1965 }, 1966 { 1967 name: "nil job, running deployment", 1968 job: nil, 1969 jobID: "foo", 1970 taskGroup: "bar", 1971 deployment: running, 1972 cancel: true, 1973 }, 1974 { 1975 name: "stopped job, failed deployment", 1976 job: job, 1977 jobID: job.ID, 1978 taskGroup: job.TaskGroups[0].Name, 1979 deployment: failed, 1980 cancel: false, 1981 }, 1982 { 1983 name: "nil job, failed deployment", 1984 job: nil, 1985 jobID: "foo", 1986 taskGroup: "bar", 1987 deployment: failed, 1988 cancel: false, 1989 }, 1990 } 1991 1992 for _, c := range cases { 1993 t.Run(c.name, func(t *testing.T) { 1994 // Create 10 allocations 1995 var allocs []*structs.Allocation 1996 for i := 0; i < 10; i++ { 1997 alloc := mock.Alloc() 1998 alloc.Job = c.job 1999 alloc.JobID = c.jobID 2000 alloc.NodeID = uuid.Generate() 2001 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 2002 alloc.TaskGroup = c.taskGroup 2003 allocs = append(allocs, alloc) 2004 } 2005 2006 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "") 2007 r := reconciler.Compute() 2008 2009 var updates []*structs.DeploymentStatusUpdate 2010 if c.cancel { 2011 updates = []*structs.DeploymentStatusUpdate{ 2012 { 2013 DeploymentID: c.deployment.ID, 2014 Status: structs.DeploymentStatusCancelled, 2015 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 2016 }, 2017 } 2018 } 2019 2020 // Assert the correct results 2021 assertResults(t, r, &resultExpectation{ 2022 createDeployment: nil, 2023 deploymentUpdates: updates, 2024 place: 0, 2025 inplace: 0, 2026 stop: 10, 2027 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2028 c.taskGroup: { 2029 Stop: 10, 2030 }, 2031 }, 2032 }) 2033 2034 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 2035 }) 2036 } 2037 } 2038 2039 // Tests the reconciler cancels an old deployment when the job is updated 2040 func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { 2041 // Create a base job 2042 job := mock.Job() 2043 2044 // Create two deployments 2045 running := structs.NewDeployment(job) 2046 failed := structs.NewDeployment(job) 2047 failed.Status = structs.DeploymentStatusFailed 2048 2049 // Make the job newer than the deployment 2050 job.Version += 10 2051 2052 cases := []struct { 2053 name string 2054 deployment *structs.Deployment 2055 cancel bool 2056 }{ 2057 { 2058 name: "running deployment", 2059 deployment: running, 2060 cancel: true, 2061 }, 2062 { 2063 name: "failed deployment", 2064 deployment: failed, 2065 cancel: false, 2066 }, 2067 } 2068 2069 for _, c := range cases { 2070 t.Run(c.name, func(t *testing.T) { 2071 // Create 10 allocations 2072 var allocs []*structs.Allocation 2073 for i := 0; i < 10; i++ { 2074 alloc := mock.Alloc() 2075 alloc.Job = job 2076 alloc.JobID = job.ID 2077 alloc.NodeID = uuid.Generate() 2078 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2079 alloc.TaskGroup = job.TaskGroups[0].Name 2080 allocs = append(allocs, alloc) 2081 } 2082 2083 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "") 2084 r := reconciler.Compute() 2085 2086 var updates []*structs.DeploymentStatusUpdate 2087 if c.cancel { 2088 updates = []*structs.DeploymentStatusUpdate{ 2089 { 2090 DeploymentID: c.deployment.ID, 2091 Status: structs.DeploymentStatusCancelled, 2092 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2093 }, 2094 } 2095 } 2096 2097 // Assert the correct results 2098 assertResults(t, r, &resultExpectation{ 2099 createDeployment: nil, 2100 deploymentUpdates: updates, 2101 place: 0, 2102 inplace: 0, 2103 stop: 0, 2104 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2105 job.TaskGroups[0].Name: { 2106 Ignore: 10, 2107 }, 2108 }, 2109 }) 2110 }) 2111 } 2112 } 2113 2114 // Tests the reconciler creates a deployment and does a rolling upgrade with 2115 // destructive changes 2116 func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { 2117 job := mock.Job() 2118 job.TaskGroups[0].Update = noCanaryUpdate 2119 2120 // Create 10 allocations from the old job 2121 var allocs []*structs.Allocation 2122 for i := 0; i < 10; i++ { 2123 alloc := mock.Alloc() 2124 alloc.Job = job 2125 alloc.JobID = job.ID 2126 alloc.NodeID = uuid.Generate() 2127 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2128 alloc.TaskGroup = job.TaskGroups[0].Name 2129 allocs = append(allocs, alloc) 2130 } 2131 2132 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2133 r := reconciler.Compute() 2134 2135 d := structs.NewDeployment(job) 2136 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2137 DesiredTotal: 10, 2138 } 2139 2140 // Assert the correct results 2141 assertResults(t, r, &resultExpectation{ 2142 createDeployment: d, 2143 deploymentUpdates: nil, 2144 destructive: 4, 2145 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2146 job.TaskGroups[0].Name: { 2147 DestructiveUpdate: 4, 2148 Ignore: 6, 2149 }, 2150 }, 2151 }) 2152 2153 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 2154 } 2155 2156 // Tests the reconciler creates a deployment for inplace updates 2157 func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { 2158 jobOld := mock.Job() 2159 job := jobOld.Copy() 2160 job.Version++ 2161 job.TaskGroups[0].Update = noCanaryUpdate 2162 2163 // Create 10 allocations from the old job 2164 var allocs []*structs.Allocation 2165 for i := 0; i < 10; i++ { 2166 alloc := mock.Alloc() 2167 alloc.Job = jobOld 2168 alloc.JobID = job.ID 2169 alloc.NodeID = uuid.Generate() 2170 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2171 alloc.TaskGroup = job.TaskGroups[0].Name 2172 allocs = append(allocs, alloc) 2173 } 2174 2175 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 2176 r := reconciler.Compute() 2177 2178 d := structs.NewDeployment(job) 2179 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2180 DesiredTotal: 10, 2181 } 2182 2183 // Assert the correct results 2184 assertResults(t, r, &resultExpectation{ 2185 createDeployment: d, 2186 deploymentUpdates: nil, 2187 place: 0, 2188 inplace: 10, 2189 stop: 0, 2190 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2191 job.TaskGroups[0].Name: { 2192 InPlaceUpdate: 10, 2193 }, 2194 }, 2195 }) 2196 } 2197 2198 // Tests the reconciler doesn't creates a deployment if there are no changes 2199 func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { 2200 job := mock.Job() 2201 job.TaskGroups[0].Update = noCanaryUpdate 2202 2203 // Create 10 allocations from the job 2204 var allocs []*structs.Allocation 2205 for i := 0; i < 10; i++ { 2206 alloc := mock.Alloc() 2207 alloc.Job = job 2208 alloc.JobID = job.ID 2209 alloc.NodeID = uuid.Generate() 2210 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2211 alloc.TaskGroup = job.TaskGroups[0].Name 2212 allocs = append(allocs, alloc) 2213 } 2214 2215 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2216 r := reconciler.Compute() 2217 2218 // Assert the correct results 2219 assertResults(t, r, &resultExpectation{ 2220 createDeployment: nil, 2221 deploymentUpdates: nil, 2222 place: 0, 2223 inplace: 0, 2224 stop: 0, 2225 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2226 job.TaskGroups[0].Name: { 2227 DestructiveUpdate: 0, 2228 Ignore: 10, 2229 }, 2230 }, 2231 }) 2232 } 2233 2234 // Tests the reconciler doesn't place any more canaries when the deployment is 2235 // paused or failed 2236 func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { 2237 job := mock.Job() 2238 job.TaskGroups[0].Update = canaryUpdate 2239 2240 cases := []struct { 2241 name string 2242 deploymentStatus string 2243 stop uint64 2244 }{ 2245 { 2246 name: "paused deployment", 2247 deploymentStatus: structs.DeploymentStatusPaused, 2248 stop: 0, 2249 }, 2250 { 2251 name: "failed deployment", 2252 deploymentStatus: structs.DeploymentStatusFailed, 2253 stop: 1, 2254 }, 2255 } 2256 2257 for _, c := range cases { 2258 t.Run(c.name, func(t *testing.T) { 2259 // Create a deployment that is paused/failed and has placed some canaries 2260 d := structs.NewDeployment(job) 2261 d.Status = c.deploymentStatus 2262 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2263 Promoted: false, 2264 DesiredCanaries: 2, 2265 DesiredTotal: 10, 2266 PlacedAllocs: 1, 2267 } 2268 2269 // Create 10 allocations for the original job 2270 var allocs []*structs.Allocation 2271 for i := 0; i < 10; i++ { 2272 alloc := mock.Alloc() 2273 alloc.Job = job 2274 alloc.JobID = job.ID 2275 alloc.NodeID = uuid.Generate() 2276 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2277 alloc.TaskGroup = job.TaskGroups[0].Name 2278 allocs = append(allocs, alloc) 2279 } 2280 2281 // Create one canary 2282 canary := mock.Alloc() 2283 canary.Job = job 2284 canary.JobID = job.ID 2285 canary.NodeID = uuid.Generate() 2286 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2287 canary.TaskGroup = job.TaskGroups[0].Name 2288 canary.DeploymentID = d.ID 2289 allocs = append(allocs, canary) 2290 d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} 2291 2292 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2293 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2294 r := reconciler.Compute() 2295 2296 // Assert the correct results 2297 assertResults(t, r, &resultExpectation{ 2298 createDeployment: nil, 2299 deploymentUpdates: nil, 2300 place: 0, 2301 inplace: 0, 2302 stop: int(c.stop), 2303 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2304 job.TaskGroups[0].Name: { 2305 Ignore: 11 - c.stop, 2306 Stop: c.stop, 2307 }, 2308 }, 2309 }) 2310 }) 2311 } 2312 } 2313 2314 // Tests the reconciler doesn't place any more allocs when the deployment is 2315 // paused or failed 2316 func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { 2317 job := mock.Job() 2318 job.TaskGroups[0].Update = noCanaryUpdate 2319 job.TaskGroups[0].Count = 15 2320 2321 cases := []struct { 2322 name string 2323 deploymentStatus string 2324 }{ 2325 { 2326 name: "paused deployment", 2327 deploymentStatus: structs.DeploymentStatusPaused, 2328 }, 2329 { 2330 name: "failed deployment", 2331 deploymentStatus: structs.DeploymentStatusFailed, 2332 }, 2333 } 2334 2335 for _, c := range cases { 2336 t.Run(c.name, func(t *testing.T) { 2337 // Create a deployment that is paused and has placed some canaries 2338 d := structs.NewDeployment(job) 2339 d.Status = c.deploymentStatus 2340 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2341 Promoted: false, 2342 DesiredTotal: 15, 2343 PlacedAllocs: 10, 2344 } 2345 2346 // Create 10 allocations for the new job 2347 var allocs []*structs.Allocation 2348 for i := 0; i < 10; i++ { 2349 alloc := mock.Alloc() 2350 alloc.Job = job 2351 alloc.JobID = job.ID 2352 alloc.NodeID = uuid.Generate() 2353 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2354 alloc.TaskGroup = job.TaskGroups[0].Name 2355 allocs = append(allocs, alloc) 2356 } 2357 2358 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 2359 r := reconciler.Compute() 2360 2361 // Assert the correct results 2362 assertResults(t, r, &resultExpectation{ 2363 createDeployment: nil, 2364 deploymentUpdates: nil, 2365 place: 0, 2366 inplace: 0, 2367 stop: 0, 2368 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2369 job.TaskGroups[0].Name: { 2370 Ignore: 10, 2371 }, 2372 }, 2373 }) 2374 }) 2375 } 2376 } 2377 2378 // Tests the reconciler doesn't do any more destructive updates when the 2379 // deployment is paused or failed 2380 func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { 2381 job := mock.Job() 2382 job.TaskGroups[0].Update = noCanaryUpdate 2383 2384 cases := []struct { 2385 name string 2386 deploymentStatus string 2387 }{ 2388 { 2389 name: "paused deployment", 2390 deploymentStatus: structs.DeploymentStatusPaused, 2391 }, 2392 { 2393 name: "failed deployment", 2394 deploymentStatus: structs.DeploymentStatusFailed, 2395 }, 2396 } 2397 2398 for _, c := range cases { 2399 t.Run(c.name, func(t *testing.T) { 2400 // Create a deployment that is paused and has placed some canaries 2401 d := structs.NewDeployment(job) 2402 d.Status = c.deploymentStatus 2403 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2404 Promoted: false, 2405 DesiredTotal: 10, 2406 PlacedAllocs: 1, 2407 } 2408 2409 // Create 9 allocations for the original job 2410 var allocs []*structs.Allocation 2411 for i := 1; i < 10; i++ { 2412 alloc := mock.Alloc() 2413 alloc.Job = job 2414 alloc.JobID = job.ID 2415 alloc.NodeID = uuid.Generate() 2416 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2417 alloc.TaskGroup = job.TaskGroups[0].Name 2418 allocs = append(allocs, alloc) 2419 } 2420 2421 // Create one for the new job 2422 newAlloc := mock.Alloc() 2423 newAlloc.Job = job 2424 newAlloc.JobID = job.ID 2425 newAlloc.NodeID = uuid.Generate() 2426 newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2427 newAlloc.TaskGroup = job.TaskGroups[0].Name 2428 newAlloc.DeploymentID = d.ID 2429 allocs = append(allocs, newAlloc) 2430 2431 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2432 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2433 r := reconciler.Compute() 2434 2435 // Assert the correct results 2436 assertResults(t, r, &resultExpectation{ 2437 createDeployment: nil, 2438 deploymentUpdates: nil, 2439 place: 0, 2440 inplace: 0, 2441 stop: 0, 2442 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2443 job.TaskGroups[0].Name: { 2444 Ignore: 10, 2445 }, 2446 }, 2447 }) 2448 }) 2449 } 2450 } 2451 2452 // Tests the reconciler handles migrations correctly when a deployment is paused 2453 // or failed 2454 func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { 2455 job := mock.Job() 2456 job.TaskGroups[0].Update = noCanaryUpdate 2457 2458 cases := []struct { 2459 name string 2460 deploymentStatus string 2461 place int 2462 stop int 2463 ignoreAnnotation uint64 2464 migrateAnnotation uint64 2465 stopAnnotation uint64 2466 }{ 2467 { 2468 name: "paused deployment", 2469 deploymentStatus: structs.DeploymentStatusPaused, 2470 place: 0, 2471 stop: 3, 2472 ignoreAnnotation: 5, 2473 stopAnnotation: 3, 2474 }, 2475 { 2476 name: "failed deployment", 2477 deploymentStatus: structs.DeploymentStatusFailed, 2478 place: 0, 2479 stop: 3, 2480 ignoreAnnotation: 5, 2481 migrateAnnotation: 0, 2482 stopAnnotation: 3, 2483 }, 2484 } 2485 2486 for _, c := range cases { 2487 t.Run(c.name, func(t *testing.T) { 2488 // Create a deployment that is paused and has placed some canaries 2489 d := structs.NewDeployment(job) 2490 d.Status = c.deploymentStatus 2491 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2492 Promoted: false, 2493 DesiredTotal: 10, 2494 PlacedAllocs: 8, 2495 } 2496 2497 // Create 8 allocations in the deployment 2498 var allocs []*structs.Allocation 2499 for i := 0; i < 8; i++ { 2500 alloc := mock.Alloc() 2501 alloc.Job = job 2502 alloc.JobID = job.ID 2503 alloc.NodeID = uuid.Generate() 2504 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2505 alloc.TaskGroup = job.TaskGroups[0].Name 2506 alloc.DeploymentID = d.ID 2507 allocs = append(allocs, alloc) 2508 } 2509 2510 // Build a map of tainted nodes 2511 tainted := make(map[string]*structs.Node, 3) 2512 for i := 0; i < 3; i++ { 2513 n := mock.Node() 2514 n.ID = allocs[i].NodeID 2515 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 2516 n.Drain = true 2517 tainted[n.ID] = n 2518 } 2519 2520 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted, "") 2521 r := reconciler.Compute() 2522 2523 // Assert the correct results 2524 assertResults(t, r, &resultExpectation{ 2525 createDeployment: nil, 2526 deploymentUpdates: nil, 2527 place: c.place, 2528 inplace: 0, 2529 stop: c.stop, 2530 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2531 job.TaskGroups[0].Name: { 2532 Migrate: c.migrateAnnotation, 2533 Ignore: c.ignoreAnnotation, 2534 Stop: c.stopAnnotation, 2535 }, 2536 }, 2537 }) 2538 }) 2539 } 2540 } 2541 2542 // Tests the reconciler handles migrating a canary correctly on a draining node 2543 func TestReconciler_DrainNode_Canary(t *testing.T) { 2544 job := mock.Job() 2545 job.TaskGroups[0].Update = canaryUpdate 2546 2547 // Create a deployment that is paused and has placed some canaries 2548 d := structs.NewDeployment(job) 2549 s := &structs.DeploymentState{ 2550 Promoted: false, 2551 DesiredTotal: 10, 2552 DesiredCanaries: 2, 2553 PlacedAllocs: 2, 2554 } 2555 d.TaskGroups[job.TaskGroups[0].Name] = s 2556 2557 // Create 10 allocations from the old job 2558 var allocs []*structs.Allocation 2559 for i := 0; i < 10; i++ { 2560 alloc := mock.Alloc() 2561 alloc.Job = job 2562 alloc.JobID = job.ID 2563 alloc.NodeID = uuid.Generate() 2564 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2565 alloc.TaskGroup = job.TaskGroups[0].Name 2566 allocs = append(allocs, alloc) 2567 } 2568 2569 // Create two canaries for the new job 2570 handled := make(map[string]allocUpdateType) 2571 for i := 0; i < 2; i++ { 2572 // Create one canary 2573 canary := mock.Alloc() 2574 canary.Job = job 2575 canary.JobID = job.ID 2576 canary.NodeID = uuid.Generate() 2577 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2578 canary.TaskGroup = job.TaskGroups[0].Name 2579 canary.DeploymentID = d.ID 2580 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2581 allocs = append(allocs, canary) 2582 handled[canary.ID] = allocUpdateFnIgnore 2583 } 2584 2585 // Build a map of tainted nodes that contains the last canary 2586 tainted := make(map[string]*structs.Node, 1) 2587 n := mock.Node() 2588 n.ID = allocs[11].NodeID 2589 allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true) 2590 n.Drain = true 2591 tainted[n.ID] = n 2592 2593 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2594 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 2595 r := reconciler.Compute() 2596 2597 // Assert the correct results 2598 assertResults(t, r, &resultExpectation{ 2599 createDeployment: nil, 2600 deploymentUpdates: nil, 2601 place: 1, 2602 inplace: 0, 2603 stop: 1, 2604 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2605 job.TaskGroups[0].Name: { 2606 Canary: 1, 2607 Ignore: 11, 2608 }, 2609 }, 2610 }) 2611 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2612 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2613 } 2614 2615 // Tests the reconciler handles migrating a canary correctly on a lost node 2616 func TestReconciler_LostNode_Canary(t *testing.T) { 2617 job := mock.Job() 2618 job.TaskGroups[0].Update = canaryUpdate 2619 2620 // Create a deployment that is paused and has placed some canaries 2621 d := structs.NewDeployment(job) 2622 s := &structs.DeploymentState{ 2623 Promoted: false, 2624 DesiredTotal: 10, 2625 DesiredCanaries: 2, 2626 PlacedAllocs: 2, 2627 } 2628 d.TaskGroups[job.TaskGroups[0].Name] = s 2629 2630 // Create 10 allocations from the old job 2631 var allocs []*structs.Allocation 2632 for i := 0; i < 10; i++ { 2633 alloc := mock.Alloc() 2634 alloc.Job = job 2635 alloc.JobID = job.ID 2636 alloc.NodeID = uuid.Generate() 2637 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2638 alloc.TaskGroup = job.TaskGroups[0].Name 2639 allocs = append(allocs, alloc) 2640 } 2641 2642 // Create two canaries for the new job 2643 handled := make(map[string]allocUpdateType) 2644 for i := 0; i < 2; i++ { 2645 // Create one canary 2646 canary := mock.Alloc() 2647 canary.Job = job 2648 canary.JobID = job.ID 2649 canary.NodeID = uuid.Generate() 2650 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2651 canary.TaskGroup = job.TaskGroups[0].Name 2652 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2653 canary.DeploymentID = d.ID 2654 allocs = append(allocs, canary) 2655 handled[canary.ID] = allocUpdateFnIgnore 2656 } 2657 2658 // Build a map of tainted nodes that contains the last canary 2659 tainted := make(map[string]*structs.Node, 1) 2660 n := mock.Node() 2661 n.ID = allocs[11].NodeID 2662 n.Status = structs.NodeStatusDown 2663 tainted[n.ID] = n 2664 2665 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2666 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 2667 r := reconciler.Compute() 2668 2669 // Assert the correct results 2670 assertResults(t, r, &resultExpectation{ 2671 createDeployment: nil, 2672 deploymentUpdates: nil, 2673 place: 1, 2674 inplace: 0, 2675 stop: 1, 2676 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2677 job.TaskGroups[0].Name: { 2678 Canary: 1, 2679 Ignore: 11, 2680 }, 2681 }, 2682 }) 2683 2684 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2685 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2686 } 2687 2688 // Tests the reconciler handles stopping canaries from older deployments 2689 func TestReconciler_StopOldCanaries(t *testing.T) { 2690 job := mock.Job() 2691 job.TaskGroups[0].Update = canaryUpdate 2692 2693 // Create an old deployment that has placed some canaries 2694 d := structs.NewDeployment(job) 2695 s := &structs.DeploymentState{ 2696 Promoted: false, 2697 DesiredTotal: 10, 2698 DesiredCanaries: 2, 2699 PlacedAllocs: 2, 2700 } 2701 d.TaskGroups[job.TaskGroups[0].Name] = s 2702 2703 // Update the job 2704 job.Version += 10 2705 2706 // Create 10 allocations from the old job 2707 var allocs []*structs.Allocation 2708 for i := 0; i < 10; i++ { 2709 alloc := mock.Alloc() 2710 alloc.Job = job 2711 alloc.JobID = job.ID 2712 alloc.NodeID = uuid.Generate() 2713 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2714 alloc.TaskGroup = job.TaskGroups[0].Name 2715 allocs = append(allocs, alloc) 2716 } 2717 2718 // Create canaries 2719 for i := 0; i < 2; i++ { 2720 // Create one canary 2721 canary := mock.Alloc() 2722 canary.Job = job 2723 canary.JobID = job.ID 2724 canary.NodeID = uuid.Generate() 2725 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2726 canary.TaskGroup = job.TaskGroups[0].Name 2727 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2728 canary.DeploymentID = d.ID 2729 allocs = append(allocs, canary) 2730 } 2731 2732 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 2733 r := reconciler.Compute() 2734 2735 newD := structs.NewDeployment(job) 2736 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2737 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2738 DesiredCanaries: 2, 2739 DesiredTotal: 10, 2740 } 2741 2742 // Assert the correct results 2743 assertResults(t, r, &resultExpectation{ 2744 createDeployment: newD, 2745 deploymentUpdates: []*structs.DeploymentStatusUpdate{ 2746 { 2747 DeploymentID: d.ID, 2748 Status: structs.DeploymentStatusCancelled, 2749 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2750 }, 2751 }, 2752 place: 2, 2753 inplace: 0, 2754 stop: 2, 2755 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2756 job.TaskGroups[0].Name: { 2757 Canary: 2, 2758 Stop: 2, 2759 Ignore: 10, 2760 }, 2761 }, 2762 }) 2763 2764 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 2765 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2766 } 2767 2768 // Tests the reconciler creates new canaries when the job changes 2769 func TestReconciler_NewCanaries(t *testing.T) { 2770 job := mock.Job() 2771 job.TaskGroups[0].Update = canaryUpdate 2772 2773 // Create 10 allocations from the old job 2774 var allocs []*structs.Allocation 2775 for i := 0; i < 10; i++ { 2776 alloc := mock.Alloc() 2777 alloc.Job = job 2778 alloc.JobID = job.ID 2779 alloc.NodeID = uuid.Generate() 2780 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2781 alloc.TaskGroup = job.TaskGroups[0].Name 2782 allocs = append(allocs, alloc) 2783 } 2784 2785 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2786 r := reconciler.Compute() 2787 2788 newD := structs.NewDeployment(job) 2789 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2790 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2791 DesiredCanaries: 2, 2792 DesiredTotal: 10, 2793 } 2794 2795 // Assert the correct results 2796 assertResults(t, r, &resultExpectation{ 2797 createDeployment: newD, 2798 deploymentUpdates: nil, 2799 place: 2, 2800 inplace: 0, 2801 stop: 0, 2802 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2803 job.TaskGroups[0].Name: { 2804 Canary: 2, 2805 Ignore: 10, 2806 }, 2807 }, 2808 }) 2809 2810 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2811 } 2812 2813 // Tests the reconciler creates new canaries when the job changes for multiple 2814 // task groups 2815 func TestReconciler_NewCanaries_MultiTG(t *testing.T) { 2816 job := mock.Job() 2817 job.TaskGroups[0].Update = canaryUpdate 2818 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 2819 job.TaskGroups[0].Name = "tg2" 2820 2821 // Create 10 allocations from the old job for each tg 2822 var allocs []*structs.Allocation 2823 for j := 0; j < 2; j++ { 2824 for i := 0; i < 10; i++ { 2825 alloc := mock.Alloc() 2826 alloc.Job = job 2827 alloc.JobID = job.ID 2828 alloc.NodeID = uuid.Generate() 2829 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 2830 alloc.TaskGroup = job.TaskGroups[j].Name 2831 allocs = append(allocs, alloc) 2832 } 2833 } 2834 2835 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2836 r := reconciler.Compute() 2837 2838 newD := structs.NewDeployment(job) 2839 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2840 state := &structs.DeploymentState{ 2841 DesiredCanaries: 2, 2842 DesiredTotal: 10, 2843 } 2844 newD.TaskGroups[job.TaskGroups[0].Name] = state 2845 newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy() 2846 2847 // Assert the correct results 2848 assertResults(t, r, &resultExpectation{ 2849 createDeployment: newD, 2850 deploymentUpdates: nil, 2851 place: 4, 2852 inplace: 0, 2853 stop: 0, 2854 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2855 job.TaskGroups[0].Name: { 2856 Canary: 2, 2857 Ignore: 10, 2858 }, 2859 job.TaskGroups[1].Name: { 2860 Canary: 2, 2861 Ignore: 10, 2862 }, 2863 }, 2864 }) 2865 2866 assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) 2867 } 2868 2869 // Tests the reconciler creates new canaries when the job changes and scales up 2870 func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { 2871 // Scale the job up to 15 2872 job := mock.Job() 2873 job.TaskGroups[0].Update = canaryUpdate 2874 job.TaskGroups[0].Count = 15 2875 2876 // Create 10 allocations from the old job 2877 var allocs []*structs.Allocation 2878 for i := 0; i < 10; i++ { 2879 alloc := mock.Alloc() 2880 alloc.Job = job 2881 alloc.JobID = job.ID 2882 alloc.NodeID = uuid.Generate() 2883 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2884 alloc.TaskGroup = job.TaskGroups[0].Name 2885 allocs = append(allocs, alloc) 2886 } 2887 2888 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2889 r := reconciler.Compute() 2890 2891 newD := structs.NewDeployment(job) 2892 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2893 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2894 DesiredCanaries: 2, 2895 DesiredTotal: 15, 2896 } 2897 2898 // Assert the correct results 2899 assertResults(t, r, &resultExpectation{ 2900 createDeployment: newD, 2901 deploymentUpdates: nil, 2902 place: 2, 2903 inplace: 0, 2904 stop: 0, 2905 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2906 job.TaskGroups[0].Name: { 2907 Canary: 2, 2908 Ignore: 10, 2909 }, 2910 }, 2911 }) 2912 2913 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2914 } 2915 2916 // Tests the reconciler creates new canaries when the job changes and scales 2917 // down 2918 func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { 2919 // Scale the job down to 5 2920 job := mock.Job() 2921 job.TaskGroups[0].Update = canaryUpdate 2922 job.TaskGroups[0].Count = 5 2923 2924 // Create 10 allocations from the old job 2925 var allocs []*structs.Allocation 2926 for i := 0; i < 10; i++ { 2927 alloc := mock.Alloc() 2928 alloc.Job = job 2929 alloc.JobID = job.ID 2930 alloc.NodeID = uuid.Generate() 2931 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2932 alloc.TaskGroup = job.TaskGroups[0].Name 2933 allocs = append(allocs, alloc) 2934 } 2935 2936 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2937 r := reconciler.Compute() 2938 2939 newD := structs.NewDeployment(job) 2940 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2941 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2942 DesiredCanaries: 2, 2943 DesiredTotal: 5, 2944 } 2945 2946 // Assert the correct results 2947 assertResults(t, r, &resultExpectation{ 2948 createDeployment: newD, 2949 deploymentUpdates: nil, 2950 place: 2, 2951 inplace: 0, 2952 stop: 5, 2953 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2954 job.TaskGroups[0].Name: { 2955 Canary: 2, 2956 Stop: 5, 2957 Ignore: 5, 2958 }, 2959 }, 2960 }) 2961 2962 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2963 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 2964 } 2965 2966 // Tests the reconciler handles filling the names of partially placed canaries 2967 func TestReconciler_NewCanaries_FillNames(t *testing.T) { 2968 job := mock.Job() 2969 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 2970 Canary: 4, 2971 MaxParallel: 2, 2972 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 2973 MinHealthyTime: 10 * time.Second, 2974 HealthyDeadline: 10 * time.Minute, 2975 } 2976 2977 // Create an existing deployment that has placed some canaries 2978 d := structs.NewDeployment(job) 2979 s := &structs.DeploymentState{ 2980 Promoted: false, 2981 DesiredTotal: 10, 2982 DesiredCanaries: 4, 2983 PlacedAllocs: 2, 2984 } 2985 d.TaskGroups[job.TaskGroups[0].Name] = s 2986 2987 // Create 10 allocations from the old job 2988 var allocs []*structs.Allocation 2989 for i := 0; i < 10; i++ { 2990 alloc := mock.Alloc() 2991 alloc.Job = job 2992 alloc.JobID = job.ID 2993 alloc.NodeID = uuid.Generate() 2994 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2995 alloc.TaskGroup = job.TaskGroups[0].Name 2996 allocs = append(allocs, alloc) 2997 } 2998 2999 // Create canaries but pick names at the ends 3000 for i := 0; i < 4; i += 3 { 3001 // Create one canary 3002 canary := mock.Alloc() 3003 canary.Job = job 3004 canary.JobID = job.ID 3005 canary.NodeID = uuid.Generate() 3006 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3007 canary.TaskGroup = job.TaskGroups[0].Name 3008 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3009 canary.DeploymentID = d.ID 3010 allocs = append(allocs, canary) 3011 } 3012 3013 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3014 r := reconciler.Compute() 3015 3016 // Assert the correct results 3017 assertResults(t, r, &resultExpectation{ 3018 createDeployment: nil, 3019 deploymentUpdates: nil, 3020 place: 2, 3021 inplace: 0, 3022 stop: 0, 3023 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3024 job.TaskGroups[0].Name: { 3025 Canary: 2, 3026 Ignore: 12, 3027 }, 3028 }, 3029 }) 3030 3031 assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) 3032 } 3033 3034 // Tests the reconciler handles canary promotion by unblocking max_parallel 3035 func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { 3036 job := mock.Job() 3037 job.TaskGroups[0].Update = canaryUpdate 3038 3039 // Create an existing deployment that has placed some canaries and mark them 3040 // promoted 3041 d := structs.NewDeployment(job) 3042 s := &structs.DeploymentState{ 3043 Promoted: true, 3044 DesiredTotal: 10, 3045 DesiredCanaries: 2, 3046 PlacedAllocs: 2, 3047 } 3048 d.TaskGroups[job.TaskGroups[0].Name] = s 3049 3050 // Create 10 allocations from the old job 3051 var allocs []*structs.Allocation 3052 for i := 0; i < 10; i++ { 3053 alloc := mock.Alloc() 3054 alloc.Job = job 3055 alloc.JobID = job.ID 3056 alloc.NodeID = uuid.Generate() 3057 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3058 alloc.TaskGroup = job.TaskGroups[0].Name 3059 allocs = append(allocs, alloc) 3060 } 3061 3062 // Create the canaries 3063 handled := make(map[string]allocUpdateType) 3064 for i := 0; i < 2; i++ { 3065 // Create one canary 3066 canary := mock.Alloc() 3067 canary.Job = job 3068 canary.JobID = job.ID 3069 canary.NodeID = uuid.Generate() 3070 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3071 canary.TaskGroup = job.TaskGroups[0].Name 3072 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3073 canary.DeploymentID = d.ID 3074 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3075 Healthy: helper.BoolToPtr(true), 3076 } 3077 allocs = append(allocs, canary) 3078 handled[canary.ID] = allocUpdateFnIgnore 3079 } 3080 3081 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3082 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3083 r := reconciler.Compute() 3084 3085 // Assert the correct results 3086 assertResults(t, r, &resultExpectation{ 3087 createDeployment: nil, 3088 deploymentUpdates: nil, 3089 destructive: 2, 3090 stop: 2, 3091 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3092 job.TaskGroups[0].Name: { 3093 Stop: 2, 3094 DestructiveUpdate: 2, 3095 Ignore: 8, 3096 }, 3097 }, 3098 }) 3099 3100 assertNoCanariesStopped(t, d, r.stop) 3101 assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) 3102 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3103 } 3104 3105 // Tests the reconciler handles canary promotion when the canary count equals 3106 // the total correctly 3107 func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { 3108 job := mock.Job() 3109 job.TaskGroups[0].Update = canaryUpdate 3110 job.TaskGroups[0].Count = 2 3111 3112 // Create an existing deployment that has placed some canaries and mark them 3113 // promoted 3114 d := structs.NewDeployment(job) 3115 s := &structs.DeploymentState{ 3116 Promoted: true, 3117 DesiredTotal: 2, 3118 DesiredCanaries: 2, 3119 PlacedAllocs: 2, 3120 } 3121 d.TaskGroups[job.TaskGroups[0].Name] = s 3122 3123 // Create 2 allocations from the old job 3124 var allocs []*structs.Allocation 3125 for i := 0; i < 2; i++ { 3126 alloc := mock.Alloc() 3127 alloc.Job = job 3128 alloc.JobID = job.ID 3129 alloc.NodeID = uuid.Generate() 3130 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3131 alloc.TaskGroup = job.TaskGroups[0].Name 3132 allocs = append(allocs, alloc) 3133 } 3134 3135 // Create the canaries 3136 handled := make(map[string]allocUpdateType) 3137 for i := 0; i < 2; i++ { 3138 // Create one canary 3139 canary := mock.Alloc() 3140 canary.Job = job 3141 canary.JobID = job.ID 3142 canary.NodeID = uuid.Generate() 3143 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3144 canary.TaskGroup = job.TaskGroups[0].Name 3145 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3146 canary.DeploymentID = d.ID 3147 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3148 Healthy: helper.BoolToPtr(true), 3149 } 3150 allocs = append(allocs, canary) 3151 handled[canary.ID] = allocUpdateFnIgnore 3152 } 3153 3154 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3155 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3156 r := reconciler.Compute() 3157 3158 updates := []*structs.DeploymentStatusUpdate{ 3159 { 3160 DeploymentID: d.ID, 3161 Status: structs.DeploymentStatusSuccessful, 3162 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3163 }, 3164 } 3165 3166 // Assert the correct results 3167 assertResults(t, r, &resultExpectation{ 3168 createDeployment: nil, 3169 deploymentUpdates: updates, 3170 place: 0, 3171 inplace: 0, 3172 stop: 2, 3173 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3174 job.TaskGroups[0].Name: { 3175 Stop: 2, 3176 Ignore: 2, 3177 }, 3178 }, 3179 }) 3180 3181 assertNoCanariesStopped(t, d, r.stop) 3182 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3183 } 3184 3185 // Tests the reconciler checks the health of placed allocs to determine the 3186 // limit 3187 func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { 3188 job := mock.Job() 3189 job.TaskGroups[0].Update = noCanaryUpdate 3190 3191 cases := []struct { 3192 healthy int 3193 }{ 3194 { 3195 healthy: 0, 3196 }, 3197 { 3198 healthy: 1, 3199 }, 3200 { 3201 healthy: 2, 3202 }, 3203 { 3204 healthy: 3, 3205 }, 3206 { 3207 healthy: 4, 3208 }, 3209 } 3210 3211 for _, c := range cases { 3212 t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { 3213 // Create an existing deployment that has placed some canaries and mark them 3214 // promoted 3215 d := structs.NewDeployment(job) 3216 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3217 Promoted: true, 3218 DesiredTotal: 10, 3219 PlacedAllocs: 4, 3220 } 3221 3222 // Create 6 allocations from the old job 3223 var allocs []*structs.Allocation 3224 for i := 4; i < 10; i++ { 3225 alloc := mock.Alloc() 3226 alloc.Job = job 3227 alloc.JobID = job.ID 3228 alloc.NodeID = uuid.Generate() 3229 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3230 alloc.TaskGroup = job.TaskGroups[0].Name 3231 allocs = append(allocs, alloc) 3232 } 3233 3234 // Create the new allocs 3235 handled := make(map[string]allocUpdateType) 3236 for i := 0; i < 4; i++ { 3237 new := mock.Alloc() 3238 new.Job = job 3239 new.JobID = job.ID 3240 new.NodeID = uuid.Generate() 3241 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3242 new.TaskGroup = job.TaskGroups[0].Name 3243 new.DeploymentID = d.ID 3244 if i < c.healthy { 3245 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3246 Healthy: helper.BoolToPtr(true), 3247 } 3248 } 3249 allocs = append(allocs, new) 3250 handled[new.ID] = allocUpdateFnIgnore 3251 } 3252 3253 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3254 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3255 r := reconciler.Compute() 3256 3257 // Assert the correct results 3258 assertResults(t, r, &resultExpectation{ 3259 createDeployment: nil, 3260 deploymentUpdates: nil, 3261 destructive: c.healthy, 3262 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3263 job.TaskGroups[0].Name: { 3264 DestructiveUpdate: uint64(c.healthy), 3265 Ignore: uint64(10 - c.healthy), 3266 }, 3267 }, 3268 }) 3269 3270 if c.healthy != 0 { 3271 assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) 3272 } 3273 }) 3274 } 3275 } 3276 3277 // Tests the reconciler handles an alloc on a tainted node during a rolling 3278 // update 3279 func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { 3280 job := mock.Job() 3281 job.TaskGroups[0].Update = noCanaryUpdate 3282 3283 // Create an existing deployment that has some placed allocs 3284 d := structs.NewDeployment(job) 3285 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3286 Promoted: true, 3287 DesiredTotal: 10, 3288 PlacedAllocs: 7, 3289 } 3290 3291 // Create 2 allocations from the old job 3292 var allocs []*structs.Allocation 3293 for i := 8; i < 10; i++ { 3294 alloc := mock.Alloc() 3295 alloc.Job = job 3296 alloc.JobID = job.ID 3297 alloc.NodeID = uuid.Generate() 3298 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3299 alloc.TaskGroup = job.TaskGroups[0].Name 3300 allocs = append(allocs, alloc) 3301 } 3302 3303 // Create the healthy replacements 3304 handled := make(map[string]allocUpdateType) 3305 for i := 0; i < 8; i++ { 3306 new := mock.Alloc() 3307 new.Job = job 3308 new.JobID = job.ID 3309 new.NodeID = uuid.Generate() 3310 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3311 new.TaskGroup = job.TaskGroups[0].Name 3312 new.DeploymentID = d.ID 3313 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3314 Healthy: helper.BoolToPtr(true), 3315 } 3316 allocs = append(allocs, new) 3317 handled[new.ID] = allocUpdateFnIgnore 3318 } 3319 3320 // Build a map of tainted nodes 3321 tainted := make(map[string]*structs.Node, 3) 3322 for i := 0; i < 3; i++ { 3323 n := mock.Node() 3324 n.ID = allocs[2+i].NodeID 3325 if i == 0 { 3326 n.Status = structs.NodeStatusDown 3327 } else { 3328 n.Drain = true 3329 allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3330 } 3331 tainted[n.ID] = n 3332 } 3333 3334 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3335 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3336 r := reconciler.Compute() 3337 3338 // Assert the correct results 3339 assertResults(t, r, &resultExpectation{ 3340 createDeployment: nil, 3341 deploymentUpdates: nil, 3342 place: 3, 3343 destructive: 2, 3344 stop: 3, 3345 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3346 job.TaskGroups[0].Name: { 3347 Place: 1, // Place the lost 3348 Stop: 1, // Stop the lost 3349 Migrate: 2, // Migrate the tainted 3350 DestructiveUpdate: 2, 3351 Ignore: 5, 3352 }, 3353 }, 3354 }) 3355 3356 assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) 3357 assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) 3358 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 3359 } 3360 3361 // Tests the reconciler handles a failed deployment and only replaces lost 3362 // deployments 3363 func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { 3364 job := mock.Job() 3365 job.TaskGroups[0].Update = noCanaryUpdate 3366 3367 // Create an existing failed deployment that has some placed allocs 3368 d := structs.NewDeployment(job) 3369 d.Status = structs.DeploymentStatusFailed 3370 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3371 Promoted: true, 3372 DesiredTotal: 10, 3373 PlacedAllocs: 4, 3374 } 3375 3376 // Create 6 allocations from the old job 3377 var allocs []*structs.Allocation 3378 for i := 4; i < 10; i++ { 3379 alloc := mock.Alloc() 3380 alloc.Job = job 3381 alloc.JobID = job.ID 3382 alloc.NodeID = uuid.Generate() 3383 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3384 alloc.TaskGroup = job.TaskGroups[0].Name 3385 allocs = append(allocs, alloc) 3386 } 3387 3388 // Create the healthy replacements 3389 handled := make(map[string]allocUpdateType) 3390 for i := 0; i < 4; i++ { 3391 new := mock.Alloc() 3392 new.Job = job 3393 new.JobID = job.ID 3394 new.NodeID = uuid.Generate() 3395 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3396 new.TaskGroup = job.TaskGroups[0].Name 3397 new.DeploymentID = d.ID 3398 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3399 Healthy: helper.BoolToPtr(true), 3400 } 3401 allocs = append(allocs, new) 3402 handled[new.ID] = allocUpdateFnIgnore 3403 } 3404 3405 // Build a map of tainted nodes 3406 tainted := make(map[string]*structs.Node, 2) 3407 for i := 0; i < 2; i++ { 3408 n := mock.Node() 3409 n.ID = allocs[6+i].NodeID 3410 if i == 0 { 3411 n.Status = structs.NodeStatusDown 3412 } else { 3413 n.Drain = true 3414 allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3415 } 3416 tainted[n.ID] = n 3417 } 3418 3419 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3420 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3421 r := reconciler.Compute() 3422 3423 // Assert the correct results 3424 assertResults(t, r, &resultExpectation{ 3425 createDeployment: nil, 3426 deploymentUpdates: nil, 3427 place: 1, // Only replace the lost node 3428 inplace: 0, 3429 stop: 2, 3430 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3431 job.TaskGroups[0].Name: { 3432 Place: 1, 3433 Stop: 2, 3434 Ignore: 8, 3435 }, 3436 }, 3437 }) 3438 3439 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 3440 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3441 } 3442 3443 // Tests the reconciler handles a run after a deployment is complete 3444 // successfully. 3445 func TestReconciler_CompleteDeployment(t *testing.T) { 3446 job := mock.Job() 3447 job.TaskGroups[0].Update = canaryUpdate 3448 3449 d := structs.NewDeployment(job) 3450 d.Status = structs.DeploymentStatusSuccessful 3451 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3452 Promoted: true, 3453 DesiredTotal: 10, 3454 DesiredCanaries: 2, 3455 PlacedAllocs: 10, 3456 HealthyAllocs: 10, 3457 } 3458 3459 // Create allocations from the old job 3460 var allocs []*structs.Allocation 3461 for i := 0; i < 10; i++ { 3462 alloc := mock.Alloc() 3463 alloc.Job = job 3464 alloc.JobID = job.ID 3465 alloc.NodeID = uuid.Generate() 3466 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3467 alloc.TaskGroup = job.TaskGroups[0].Name 3468 alloc.DeploymentID = d.ID 3469 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3470 Healthy: helper.BoolToPtr(true), 3471 } 3472 allocs = append(allocs, alloc) 3473 } 3474 3475 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3476 r := reconciler.Compute() 3477 3478 // Assert the correct results 3479 assertResults(t, r, &resultExpectation{ 3480 createDeployment: nil, 3481 deploymentUpdates: nil, 3482 place: 0, 3483 inplace: 0, 3484 stop: 0, 3485 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3486 job.TaskGroups[0].Name: { 3487 Ignore: 10, 3488 }, 3489 }, 3490 }) 3491 } 3492 3493 // Test that a failed deployment cancels non-promoted canaries 3494 func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { 3495 // Create a job with two task groups 3496 job := mock.Job() 3497 job.TaskGroups[0].Update = canaryUpdate 3498 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3499 job.TaskGroups[1].Name = "two" 3500 3501 // Create an existing failed deployment that has promoted one task group 3502 d := structs.NewDeployment(job) 3503 d.Status = structs.DeploymentStatusFailed 3504 s0 := &structs.DeploymentState{ 3505 Promoted: true, 3506 DesiredTotal: 10, 3507 DesiredCanaries: 2, 3508 PlacedAllocs: 4, 3509 } 3510 s1 := &structs.DeploymentState{ 3511 Promoted: false, 3512 DesiredTotal: 10, 3513 DesiredCanaries: 2, 3514 PlacedAllocs: 2, 3515 } 3516 d.TaskGroups[job.TaskGroups[0].Name] = s0 3517 d.TaskGroups[job.TaskGroups[1].Name] = s1 3518 3519 // Create 6 allocations from the old job 3520 var allocs []*structs.Allocation 3521 handled := make(map[string]allocUpdateType) 3522 for _, group := range []int{0, 1} { 3523 replacements := 4 3524 state := s0 3525 if group == 1 { 3526 replacements = 2 3527 state = s1 3528 } 3529 3530 // Create the healthy replacements 3531 for i := 0; i < replacements; i++ { 3532 new := mock.Alloc() 3533 new.Job = job 3534 new.JobID = job.ID 3535 new.NodeID = uuid.Generate() 3536 new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 3537 new.TaskGroup = job.TaskGroups[group].Name 3538 new.DeploymentID = d.ID 3539 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3540 Healthy: helper.BoolToPtr(true), 3541 } 3542 allocs = append(allocs, new) 3543 handled[new.ID] = allocUpdateFnIgnore 3544 3545 // Add the alloc to the canary list 3546 if i < 2 { 3547 state.PlacedCanaries = append(state.PlacedCanaries, new.ID) 3548 } 3549 } 3550 for i := replacements; i < 10; i++ { 3551 alloc := mock.Alloc() 3552 alloc.Job = job 3553 alloc.JobID = job.ID 3554 alloc.NodeID = uuid.Generate() 3555 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 3556 alloc.TaskGroup = job.TaskGroups[group].Name 3557 allocs = append(allocs, alloc) 3558 } 3559 } 3560 3561 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3562 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3563 r := reconciler.Compute() 3564 3565 // Assert the correct results 3566 assertResults(t, r, &resultExpectation{ 3567 createDeployment: nil, 3568 deploymentUpdates: nil, 3569 place: 0, 3570 inplace: 0, 3571 stop: 2, 3572 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3573 job.TaskGroups[0].Name: { 3574 Ignore: 10, 3575 }, 3576 job.TaskGroups[1].Name: { 3577 Stop: 2, 3578 Ignore: 8, 3579 }, 3580 }, 3581 }) 3582 3583 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3584 } 3585 3586 // Test that a failed deployment and updated job works 3587 func TestReconciler_FailedDeployment_NewJob(t *testing.T) { 3588 job := mock.Job() 3589 job.TaskGroups[0].Update = noCanaryUpdate 3590 3591 // Create an existing failed deployment that has some placed allocs 3592 d := structs.NewDeployment(job) 3593 d.Status = structs.DeploymentStatusFailed 3594 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3595 Promoted: true, 3596 DesiredTotal: 10, 3597 PlacedAllocs: 4, 3598 } 3599 3600 // Create 6 allocations from the old job 3601 var allocs []*structs.Allocation 3602 for i := 4; i < 10; i++ { 3603 alloc := mock.Alloc() 3604 alloc.Job = job 3605 alloc.JobID = job.ID 3606 alloc.NodeID = uuid.Generate() 3607 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3608 alloc.TaskGroup = job.TaskGroups[0].Name 3609 allocs = append(allocs, alloc) 3610 } 3611 3612 // Create the healthy replacements 3613 for i := 0; i < 4; i++ { 3614 new := mock.Alloc() 3615 new.Job = job 3616 new.JobID = job.ID 3617 new.NodeID = uuid.Generate() 3618 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3619 new.TaskGroup = job.TaskGroups[0].Name 3620 new.DeploymentID = d.ID 3621 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3622 Healthy: helper.BoolToPtr(true), 3623 } 3624 allocs = append(allocs, new) 3625 } 3626 3627 // Up the job version 3628 jobNew := job.Copy() 3629 jobNew.Version += 100 3630 3631 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "") 3632 r := reconciler.Compute() 3633 3634 dnew := structs.NewDeployment(jobNew) 3635 dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3636 DesiredTotal: 10, 3637 } 3638 3639 // Assert the correct results 3640 assertResults(t, r, &resultExpectation{ 3641 createDeployment: dnew, 3642 deploymentUpdates: nil, 3643 destructive: 4, 3644 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3645 job.TaskGroups[0].Name: { 3646 DestructiveUpdate: 4, 3647 Ignore: 6, 3648 }, 3649 }, 3650 }) 3651 3652 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 3653 } 3654 3655 // Tests the reconciler marks a deployment as complete 3656 func TestReconciler_MarkDeploymentComplete(t *testing.T) { 3657 job := mock.Job() 3658 job.TaskGroups[0].Update = noCanaryUpdate 3659 3660 d := structs.NewDeployment(job) 3661 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3662 Promoted: true, 3663 DesiredTotal: 10, 3664 PlacedAllocs: 10, 3665 HealthyAllocs: 10, 3666 } 3667 3668 // Create allocations from the old job 3669 var allocs []*structs.Allocation 3670 for i := 0; i < 10; i++ { 3671 alloc := mock.Alloc() 3672 alloc.Job = job 3673 alloc.JobID = job.ID 3674 alloc.NodeID = uuid.Generate() 3675 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3676 alloc.TaskGroup = job.TaskGroups[0].Name 3677 alloc.DeploymentID = d.ID 3678 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3679 Healthy: helper.BoolToPtr(true), 3680 } 3681 allocs = append(allocs, alloc) 3682 } 3683 3684 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3685 r := reconciler.Compute() 3686 3687 updates := []*structs.DeploymentStatusUpdate{ 3688 { 3689 DeploymentID: d.ID, 3690 Status: structs.DeploymentStatusSuccessful, 3691 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3692 }, 3693 } 3694 3695 // Assert the correct results 3696 assertResults(t, r, &resultExpectation{ 3697 createDeployment: nil, 3698 deploymentUpdates: updates, 3699 place: 0, 3700 inplace: 0, 3701 stop: 0, 3702 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3703 job.TaskGroups[0].Name: { 3704 Ignore: 10, 3705 }, 3706 }, 3707 }) 3708 } 3709 3710 // Tests the reconciler handles changing a job such that a deployment is created 3711 // while doing a scale up but as the second eval. 3712 func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { 3713 // Scale the job up to 15 3714 job := mock.Job() 3715 job.TaskGroups[0].Update = noCanaryUpdate 3716 job.TaskGroups[0].Count = 30 3717 3718 // Create a deployment that is paused and has placed some canaries 3719 d := structs.NewDeployment(job) 3720 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3721 Promoted: false, 3722 DesiredTotal: 30, 3723 PlacedAllocs: 20, 3724 } 3725 3726 // Create 10 allocations from the old job 3727 var allocs []*structs.Allocation 3728 for i := 0; i < 10; i++ { 3729 alloc := mock.Alloc() 3730 alloc.Job = job 3731 alloc.JobID = job.ID 3732 alloc.NodeID = uuid.Generate() 3733 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3734 alloc.TaskGroup = job.TaskGroups[0].Name 3735 allocs = append(allocs, alloc) 3736 } 3737 3738 // Create 20 from new job 3739 handled := make(map[string]allocUpdateType) 3740 for i := 10; i < 30; i++ { 3741 alloc := mock.Alloc() 3742 alloc.Job = job 3743 alloc.JobID = job.ID 3744 alloc.DeploymentID = d.ID 3745 alloc.NodeID = uuid.Generate() 3746 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3747 alloc.TaskGroup = job.TaskGroups[0].Name 3748 allocs = append(allocs, alloc) 3749 handled[alloc.ID] = allocUpdateFnIgnore 3750 } 3751 3752 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3753 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3754 r := reconciler.Compute() 3755 3756 // Assert the correct results 3757 assertResults(t, r, &resultExpectation{ 3758 createDeployment: nil, 3759 deploymentUpdates: nil, 3760 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3761 job.TaskGroups[0].Name: { 3762 // All should be ignored because nothing has been marked as 3763 // healthy. 3764 Ignore: 30, 3765 }, 3766 }, 3767 }) 3768 } 3769 3770 // Tests the reconciler doesn't stop allocations when doing a rolling upgrade 3771 // where the count of the old job allocs is < desired count. 3772 func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { 3773 job := mock.Job() 3774 job.TaskGroups[0].Update = noCanaryUpdate 3775 3776 // Create 7 allocations from the old job 3777 var allocs []*structs.Allocation 3778 for i := 0; i < 7; i++ { 3779 alloc := mock.Alloc() 3780 alloc.Job = job 3781 alloc.JobID = job.ID 3782 alloc.NodeID = uuid.Generate() 3783 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3784 alloc.TaskGroup = job.TaskGroups[0].Name 3785 allocs = append(allocs, alloc) 3786 } 3787 3788 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3789 r := reconciler.Compute() 3790 3791 d := structs.NewDeployment(job) 3792 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3793 DesiredTotal: 10, 3794 } 3795 3796 // Assert the correct results 3797 assertResults(t, r, &resultExpectation{ 3798 createDeployment: d, 3799 deploymentUpdates: nil, 3800 place: 3, 3801 destructive: 1, 3802 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3803 job.TaskGroups[0].Name: { 3804 Place: 3, 3805 DestructiveUpdate: 1, 3806 Ignore: 6, 3807 }, 3808 }, 3809 }) 3810 3811 assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) 3812 assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) 3813 } 3814 3815 // Tests that the reconciler handles rerunning a batch job in the case that the 3816 // allocations are from an older instance of the job. 3817 func TestReconciler_Batch_Rerun(t *testing.T) { 3818 job := mock.Job() 3819 job.Type = structs.JobTypeBatch 3820 job.TaskGroups[0].Update = nil 3821 3822 // Create 10 allocations from the old job and have them be complete 3823 var allocs []*structs.Allocation 3824 for i := 0; i < 10; i++ { 3825 alloc := mock.Alloc() 3826 alloc.Job = job 3827 alloc.JobID = job.ID 3828 alloc.NodeID = uuid.Generate() 3829 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3830 alloc.TaskGroup = job.TaskGroups[0].Name 3831 alloc.ClientStatus = structs.AllocClientStatusComplete 3832 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3833 allocs = append(allocs, alloc) 3834 } 3835 3836 // Create a copy of the job that is "new" 3837 job2 := job.Copy() 3838 job2.CreateIndex++ 3839 3840 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "") 3841 r := reconciler.Compute() 3842 3843 // Assert the correct results 3844 assertResults(t, r, &resultExpectation{ 3845 createDeployment: nil, 3846 deploymentUpdates: nil, 3847 place: 10, 3848 destructive: 0, 3849 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3850 job.TaskGroups[0].Name: { 3851 Place: 10, 3852 DestructiveUpdate: 0, 3853 Ignore: 10, 3854 }, 3855 }, 3856 }) 3857 3858 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 3859 } 3860 3861 // Test that a failed deployment will not result in rescheduling failed allocations 3862 func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { 3863 job := mock.Job() 3864 job.TaskGroups[0].Update = noCanaryUpdate 3865 3866 tgName := job.TaskGroups[0].Name 3867 now := time.Now() 3868 // Create an existing failed deployment that has some placed allocs 3869 d := structs.NewDeployment(job) 3870 d.Status = structs.DeploymentStatusFailed 3871 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3872 Promoted: true, 3873 DesiredTotal: 5, 3874 PlacedAllocs: 4, 3875 } 3876 3877 // Create 4 allocations and mark two as failed 3878 var allocs []*structs.Allocation 3879 for i := 0; i < 4; i++ { 3880 alloc := mock.Alloc() 3881 alloc.Job = job 3882 alloc.JobID = job.ID 3883 alloc.NodeID = uuid.Generate() 3884 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3885 alloc.TaskGroup = job.TaskGroups[0].Name 3886 allocs = append(allocs, alloc) 3887 } 3888 3889 //create some allocations that are reschedulable now 3890 allocs[2].ClientStatus = structs.AllocClientStatusFailed 3891 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 3892 StartedAt: now.Add(-1 * time.Hour), 3893 FinishedAt: now.Add(-10 * time.Second)}} 3894 3895 allocs[3].ClientStatus = structs.AllocClientStatusFailed 3896 allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 3897 StartedAt: now.Add(-1 * time.Hour), 3898 FinishedAt: now.Add(-10 * time.Second)}} 3899 3900 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3901 r := reconciler.Compute() 3902 3903 // Assert that no rescheduled placements were created 3904 assertResults(t, r, &resultExpectation{ 3905 place: 0, 3906 createDeployment: nil, 3907 deploymentUpdates: nil, 3908 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3909 job.TaskGroups[0].Name: { 3910 Ignore: 2, 3911 }, 3912 }, 3913 }) 3914 } 3915 3916 // Test that a running deployment with failed allocs will not result in rescheduling failed allocations 3917 func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { 3918 job := mock.Job() 3919 job.TaskGroups[0].Update = noCanaryUpdate 3920 tgName := job.TaskGroups[0].Name 3921 now := time.Now() 3922 3923 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 3924 d := structs.NewDeployment(job) 3925 d.Status = structs.DeploymentStatusRunning 3926 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3927 Promoted: false, 3928 DesiredTotal: 5, 3929 PlacedAllocs: 4, 3930 } 3931 3932 // Create 4 allocations and mark two as failed 3933 var allocs []*structs.Allocation 3934 for i := 0; i < 4; i++ { 3935 alloc := mock.Alloc() 3936 alloc.Job = job 3937 alloc.JobID = job.ID 3938 alloc.NodeID = uuid.Generate() 3939 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3940 alloc.TaskGroup = job.TaskGroups[0].Name 3941 alloc.DeploymentID = d.ID 3942 allocs = append(allocs, alloc) 3943 } 3944 3945 // Create allocs that are reschedulable now 3946 allocs[2].ClientStatus = structs.AllocClientStatusFailed 3947 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 3948 StartedAt: now.Add(-1 * time.Hour), 3949 FinishedAt: now.Add(-10 * time.Second)}} 3950 3951 allocs[3].ClientStatus = structs.AllocClientStatusFailed 3952 allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 3953 StartedAt: now.Add(-1 * time.Hour), 3954 FinishedAt: now.Add(-10 * time.Second)}} 3955 3956 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3957 r := reconciler.Compute() 3958 3959 // Assert that no rescheduled placements were created 3960 assertResults(t, r, &resultExpectation{ 3961 place: 0, 3962 createDeployment: nil, 3963 deploymentUpdates: nil, 3964 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3965 job.TaskGroups[0].Name: { 3966 Ignore: 2, 3967 }, 3968 }, 3969 }) 3970 } 3971 3972 // Test that a failed deployment cancels non-promoted canaries 3973 func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { 3974 // Create a job 3975 job := mock.Job() 3976 job.TaskGroups[0].Count = 3 3977 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 3978 Canary: 3, 3979 MaxParallel: 2, 3980 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 3981 MinHealthyTime: 10 * time.Second, 3982 HealthyDeadline: 10 * time.Minute, 3983 Stagger: 31 * time.Second, 3984 } 3985 3986 // Create v1 of the job 3987 jobv1 := job.Copy() 3988 jobv1.Version = 1 3989 jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"} 3990 3991 // Create v2 of the job 3992 jobv2 := job.Copy() 3993 jobv2.Version = 2 3994 jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"} 3995 3996 // Create an existing failed deployment that has promoted one task group 3997 d := structs.NewDeployment(jobv2) 3998 state := &structs.DeploymentState{ 3999 Promoted: false, 4000 DesiredTotal: 3, 4001 PlacedAllocs: 3, 4002 } 4003 d.TaskGroups[job.TaskGroups[0].Name] = state 4004 4005 // Create the original 4006 var allocs []*structs.Allocation 4007 for i := 0; i < 3; i++ { 4008 new := mock.Alloc() 4009 new.Job = jobv2 4010 new.JobID = job.ID 4011 new.NodeID = uuid.Generate() 4012 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4013 new.TaskGroup = job.TaskGroups[0].Name 4014 new.DeploymentID = d.ID 4015 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4016 Healthy: helper.BoolToPtr(true), 4017 } 4018 new.ClientStatus = structs.AllocClientStatusRunning 4019 allocs = append(allocs, new) 4020 4021 } 4022 for i := 0; i < 3; i++ { 4023 new := mock.Alloc() 4024 new.Job = jobv1 4025 new.JobID = jobv1.ID 4026 new.NodeID = uuid.Generate() 4027 new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i)) 4028 new.TaskGroup = job.TaskGroups[0].Name 4029 new.DeploymentID = uuid.Generate() 4030 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4031 Healthy: helper.BoolToPtr(false), 4032 } 4033 new.DesiredStatus = structs.AllocDesiredStatusStop 4034 new.ClientStatus = structs.AllocClientStatusFailed 4035 allocs = append(allocs, new) 4036 } 4037 4038 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "") 4039 r := reconciler.Compute() 4040 4041 updates := []*structs.DeploymentStatusUpdate{ 4042 { 4043 DeploymentID: d.ID, 4044 Status: structs.DeploymentStatusSuccessful, 4045 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4046 }, 4047 } 4048 4049 // Assert the correct results 4050 assertResults(t, r, &resultExpectation{ 4051 createDeployment: nil, 4052 deploymentUpdates: updates, 4053 place: 0, 4054 inplace: 0, 4055 stop: 0, 4056 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4057 job.TaskGroups[0].Name: { 4058 Stop: 0, 4059 InPlaceUpdate: 0, 4060 Ignore: 3, 4061 }, 4062 }, 4063 }) 4064 }