github.com/anuvu/nomad@v0.8.7-atom1/scheduler/reconcile_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "log" 6 "os" 7 "reflect" 8 "regexp" 9 "strconv" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/helper" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/kr/pretty" 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 ) 21 22 var ( 23 canaryUpdate = &structs.UpdateStrategy{ 24 Canary: 2, 25 MaxParallel: 2, 26 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 27 MinHealthyTime: 10 * time.Second, 28 HealthyDeadline: 10 * time.Minute, 29 Stagger: 31 * time.Second, 30 } 31 32 noCanaryUpdate = &structs.UpdateStrategy{ 33 MaxParallel: 4, 34 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 35 MinHealthyTime: 10 * time.Second, 36 HealthyDeadline: 10 * time.Minute, 37 Stagger: 31 * time.Second, 38 } 39 ) 40 41 func testLogger() *log.Logger { 42 return log.New(os.Stderr, "", log.LstdFlags) 43 } 44 45 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 46 return true, false, nil 47 } 48 49 func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 50 return false, true, nil 51 } 52 53 func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 54 // Create a shallow copy 55 newAlloc := existing.CopySkipJob() 56 newAlloc.TaskResources = make(map[string]*structs.Resources) 57 58 // Use the new task resources but keep the network from the old 59 for _, task := range newTG.Tasks { 60 r := task.Resources.Copy() 61 r.Networks = existing.TaskResources[task.Name].Networks 62 newAlloc.TaskResources[task.Name] = r 63 } 64 65 return false, false, newAlloc 66 } 67 68 func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { 69 return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 70 if fn, ok := handled[existing.ID]; ok { 71 return fn(existing, newJob, newTG) 72 } 73 74 return unhandled(existing, newJob, newTG) 75 } 76 } 77 78 var ( 79 // AllocationIndexRegex is a regular expression to find the allocation index. 80 allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 81 ) 82 83 // allocNameToIndex returns the index of the allocation. 84 func allocNameToIndex(name string) uint { 85 matches := allocationIndexRegex.FindStringSubmatch(name) 86 if len(matches) != 2 { 87 return 0 88 } 89 90 index, err := strconv.Atoi(matches[1]) 91 if err != nil { 92 return 0 93 } 94 95 return uint(index) 96 } 97 98 func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { 99 t.Helper() 100 m := make(map[uint]int) 101 for _, i := range indexes { 102 m[uint(i)] += 1 103 } 104 105 for _, n := range names { 106 index := allocNameToIndex(n) 107 val, contained := m[index] 108 if !contained { 109 t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names) 110 } 111 112 val-- 113 if val < 0 { 114 t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names) 115 } 116 m[index] = val 117 } 118 119 for k, remainder := range m { 120 if remainder != 0 { 121 t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names) 122 } 123 } 124 } 125 126 func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { 127 t.Helper() 128 canaryIndex := make(map[string]struct{}) 129 for _, state := range d.TaskGroups { 130 for _, c := range state.PlacedCanaries { 131 canaryIndex[c] = struct{}{} 132 } 133 } 134 135 for _, s := range stop { 136 if _, ok := canaryIndex[s.alloc.ID]; ok { 137 t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) 138 } 139 } 140 } 141 142 func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { 143 t.Helper() 144 names := make(map[string]struct{}, numPrevious) 145 146 found := 0 147 for _, p := range place { 148 if _, ok := names[p.name]; ok { 149 t.Fatalf("Name %q already placed", p.name) 150 } 151 names[p.name] = struct{}{} 152 153 if p.previousAlloc == nil { 154 continue 155 } 156 157 if act := p.previousAlloc.Name; p.name != act { 158 t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) 159 } 160 found++ 161 } 162 if numPrevious != found { 163 t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) 164 } 165 } 166 167 func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { 168 t.Helper() 169 names := make(map[string]struct{}, numRescheduled) 170 171 found := 0 172 for _, p := range place { 173 if _, ok := names[p.name]; ok { 174 t.Fatalf("Name %q already placed", p.name) 175 } 176 names[p.name] = struct{}{} 177 178 if p.previousAlloc == nil { 179 continue 180 } 181 if p.reschedule { 182 found++ 183 } 184 185 } 186 if numRescheduled != found { 187 t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found) 188 } 189 } 190 191 func intRange(pairs ...int) []int { 192 if len(pairs)%2 != 0 { 193 return nil 194 } 195 196 var r []int 197 for i := 0; i < len(pairs); i += 2 { 198 for j := pairs[i]; j <= pairs[i+1]; j++ { 199 r = append(r, j) 200 } 201 } 202 return r 203 } 204 205 func placeResultsToNames(place []allocPlaceResult) []string { 206 names := make([]string, 0, len(place)) 207 for _, p := range place { 208 names = append(names, p.name) 209 } 210 return names 211 } 212 213 func destructiveResultsToNames(destructive []allocDestructiveResult) []string { 214 names := make([]string, 0, len(destructive)) 215 for _, d := range destructive { 216 names = append(names, d.placeName) 217 } 218 return names 219 } 220 221 func stopResultsToNames(stop []allocStopResult) []string { 222 names := make([]string, 0, len(stop)) 223 for _, s := range stop { 224 names = append(names, s.alloc.Name) 225 } 226 return names 227 } 228 229 func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string { 230 names := make([]string, 0, len(attributeUpdates)) 231 for _, a := range attributeUpdates { 232 names = append(names, a.Name) 233 } 234 return names 235 } 236 237 func allocsToNames(allocs []*structs.Allocation) []string { 238 names := make([]string, 0, len(allocs)) 239 for _, a := range allocs { 240 names = append(names, a.Name) 241 } 242 return names 243 } 244 245 type resultExpectation struct { 246 createDeployment *structs.Deployment 247 deploymentUpdates []*structs.DeploymentStatusUpdate 248 place int 249 destructive int 250 inplace int 251 attributeUpdates int 252 stop int 253 desiredTGUpdates map[string]*structs.DesiredUpdates 254 } 255 256 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { 257 t.Helper() 258 assert := assert.New(t) 259 260 if exp.createDeployment != nil && r.deployment == nil { 261 t.Errorf("Expect a created deployment got none") 262 } else if exp.createDeployment == nil && r.deployment != nil { 263 t.Errorf("Expect no created deployment; got %#v", r.deployment) 264 } else if exp.createDeployment != nil && r.deployment != nil { 265 // Clear the deployment ID 266 r.deployment.ID, exp.createDeployment.ID = "", "" 267 if !reflect.DeepEqual(r.deployment, exp.createDeployment) { 268 t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", 269 r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment)) 270 } 271 } 272 273 assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates") 274 assert.Len(r.place, exp.place, "Expected Placements") 275 assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive") 276 assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates") 277 assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates") 278 assert.Len(r.stop, exp.stop, "Expected Stops") 279 assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations") 280 } 281 282 // Tests the reconciler properly handles placements for a job that has no 283 // existing allocations 284 func TestReconciler_Place_NoExisting(t *testing.T) { 285 job := mock.Job() 286 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "") 287 r := reconciler.Compute() 288 289 // Assert the correct results 290 assertResults(t, r, &resultExpectation{ 291 createDeployment: nil, 292 deploymentUpdates: nil, 293 place: 10, 294 inplace: 0, 295 stop: 0, 296 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 297 job.TaskGroups[0].Name: { 298 Place: 10, 299 }, 300 }, 301 }) 302 303 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 304 } 305 306 // Tests the reconciler properly handles placements for a job that has some 307 // existing allocations 308 func TestReconciler_Place_Existing(t *testing.T) { 309 job := mock.Job() 310 311 // Create 3 existing allocations 312 var allocs []*structs.Allocation 313 for i := 0; i < 5; i++ { 314 alloc := mock.Alloc() 315 alloc.Job = job 316 alloc.JobID = job.ID 317 alloc.NodeID = uuid.Generate() 318 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 319 allocs = append(allocs, alloc) 320 } 321 322 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 323 r := reconciler.Compute() 324 325 // Assert the correct results 326 assertResults(t, r, &resultExpectation{ 327 createDeployment: nil, 328 deploymentUpdates: nil, 329 place: 5, 330 inplace: 0, 331 stop: 0, 332 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 333 job.TaskGroups[0].Name: { 334 Place: 5, 335 Ignore: 5, 336 }, 337 }, 338 }) 339 340 assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) 341 } 342 343 // Tests the reconciler properly handles stopping allocations for a job that has 344 // scaled down 345 func TestReconciler_ScaleDown_Partial(t *testing.T) { 346 // Has desired 10 347 job := mock.Job() 348 349 // Create 20 existing allocations 350 var allocs []*structs.Allocation 351 for i := 0; i < 20; i++ { 352 alloc := mock.Alloc() 353 alloc.Job = job 354 alloc.JobID = job.ID 355 alloc.NodeID = uuid.Generate() 356 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 357 allocs = append(allocs, alloc) 358 } 359 360 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 361 r := reconciler.Compute() 362 363 // Assert the correct results 364 assertResults(t, r, &resultExpectation{ 365 createDeployment: nil, 366 deploymentUpdates: nil, 367 place: 0, 368 inplace: 0, 369 stop: 10, 370 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 371 job.TaskGroups[0].Name: { 372 Ignore: 10, 373 Stop: 10, 374 }, 375 }, 376 }) 377 378 assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) 379 } 380 381 // Tests the reconciler properly handles stopping allocations for a job that has 382 // scaled down to zero desired 383 func TestReconciler_ScaleDown_Zero(t *testing.T) { 384 // Set desired 0 385 job := mock.Job() 386 job.TaskGroups[0].Count = 0 387 388 // Create 20 existing allocations 389 var allocs []*structs.Allocation 390 for i := 0; i < 20; i++ { 391 alloc := mock.Alloc() 392 alloc.Job = job 393 alloc.JobID = job.ID 394 alloc.NodeID = uuid.Generate() 395 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 396 allocs = append(allocs, alloc) 397 } 398 399 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 400 r := reconciler.Compute() 401 402 // Assert the correct results 403 assertResults(t, r, &resultExpectation{ 404 createDeployment: nil, 405 deploymentUpdates: nil, 406 place: 0, 407 inplace: 0, 408 stop: 20, 409 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 410 job.TaskGroups[0].Name: { 411 Stop: 20, 412 }, 413 }, 414 }) 415 416 assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) 417 } 418 419 // Tests the reconciler properly handles stopping allocations for a job that has 420 // scaled down to zero desired where allocs have duplicate names 421 func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { 422 // Set desired 0 423 job := mock.Job() 424 job.TaskGroups[0].Count = 0 425 426 // Create 20 existing allocations 427 var allocs []*structs.Allocation 428 var expectedStopped []int 429 for i := 0; i < 20; i++ { 430 alloc := mock.Alloc() 431 alloc.Job = job 432 alloc.JobID = job.ID 433 alloc.NodeID = uuid.Generate() 434 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 435 allocs = append(allocs, alloc) 436 expectedStopped = append(expectedStopped, i%2) 437 } 438 439 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 440 r := reconciler.Compute() 441 442 // Assert the correct results 443 assertResults(t, r, &resultExpectation{ 444 createDeployment: nil, 445 deploymentUpdates: nil, 446 place: 0, 447 inplace: 0, 448 stop: 20, 449 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 450 job.TaskGroups[0].Name: { 451 Stop: 20, 452 }, 453 }, 454 }) 455 456 assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) 457 } 458 459 // Tests the reconciler properly handles inplace upgrading allocations 460 func TestReconciler_Inplace(t *testing.T) { 461 job := mock.Job() 462 463 // Create 10 existing allocations 464 var allocs []*structs.Allocation 465 for i := 0; i < 10; i++ { 466 alloc := mock.Alloc() 467 alloc.Job = job 468 alloc.JobID = job.ID 469 alloc.NodeID = uuid.Generate() 470 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 471 allocs = append(allocs, alloc) 472 } 473 474 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 475 r := reconciler.Compute() 476 477 // Assert the correct results 478 assertResults(t, r, &resultExpectation{ 479 createDeployment: nil, 480 deploymentUpdates: nil, 481 place: 0, 482 inplace: 10, 483 stop: 0, 484 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 485 job.TaskGroups[0].Name: { 486 InPlaceUpdate: 10, 487 }, 488 }, 489 }) 490 491 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 492 } 493 494 // Tests the reconciler properly handles inplace upgrading allocations while 495 // scaling up 496 func TestReconciler_Inplace_ScaleUp(t *testing.T) { 497 // Set desired 15 498 job := mock.Job() 499 job.TaskGroups[0].Count = 15 500 501 // Create 10 existing allocations 502 var allocs []*structs.Allocation 503 for i := 0; i < 10; i++ { 504 alloc := mock.Alloc() 505 alloc.Job = job 506 alloc.JobID = job.ID 507 alloc.NodeID = uuid.Generate() 508 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 509 allocs = append(allocs, alloc) 510 } 511 512 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 513 r := reconciler.Compute() 514 515 // Assert the correct results 516 assertResults(t, r, &resultExpectation{ 517 createDeployment: nil, 518 deploymentUpdates: nil, 519 place: 5, 520 inplace: 10, 521 stop: 0, 522 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 523 job.TaskGroups[0].Name: { 524 Place: 5, 525 InPlaceUpdate: 10, 526 }, 527 }, 528 }) 529 530 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 531 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 532 } 533 534 // Tests the reconciler properly handles inplace upgrading allocations while 535 // scaling down 536 func TestReconciler_Inplace_ScaleDown(t *testing.T) { 537 // Set desired 5 538 job := mock.Job() 539 job.TaskGroups[0].Count = 5 540 541 // Create 10 existing allocations 542 var allocs []*structs.Allocation 543 for i := 0; i < 10; i++ { 544 alloc := mock.Alloc() 545 alloc.Job = job 546 alloc.JobID = job.ID 547 alloc.NodeID = uuid.Generate() 548 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 549 allocs = append(allocs, alloc) 550 } 551 552 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 553 r := reconciler.Compute() 554 555 // Assert the correct results 556 assertResults(t, r, &resultExpectation{ 557 createDeployment: nil, 558 deploymentUpdates: nil, 559 place: 0, 560 inplace: 5, 561 stop: 5, 562 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 563 job.TaskGroups[0].Name: { 564 Stop: 5, 565 InPlaceUpdate: 5, 566 }, 567 }, 568 }) 569 570 assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) 571 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 572 } 573 574 // Tests the reconciler properly handles destructive upgrading allocations 575 func TestReconciler_Destructive(t *testing.T) { 576 job := mock.Job() 577 578 // Create 10 existing allocations 579 var allocs []*structs.Allocation 580 for i := 0; i < 10; i++ { 581 alloc := mock.Alloc() 582 alloc.Job = job 583 alloc.JobID = job.ID 584 alloc.NodeID = uuid.Generate() 585 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 586 allocs = append(allocs, alloc) 587 } 588 589 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 590 r := reconciler.Compute() 591 592 // Assert the correct results 593 assertResults(t, r, &resultExpectation{ 594 createDeployment: nil, 595 deploymentUpdates: nil, 596 destructive: 10, 597 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 598 job.TaskGroups[0].Name: { 599 DestructiveUpdate: 10, 600 }, 601 }, 602 }) 603 604 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 605 } 606 607 // Tests the reconciler properly handles destructive upgrading allocations while 608 // scaling up 609 func TestReconciler_Destructive_ScaleUp(t *testing.T) { 610 // Set desired 15 611 job := mock.Job() 612 job.TaskGroups[0].Count = 15 613 614 // Create 10 existing allocations 615 var allocs []*structs.Allocation 616 for i := 0; i < 10; i++ { 617 alloc := mock.Alloc() 618 alloc.Job = job 619 alloc.JobID = job.ID 620 alloc.NodeID = uuid.Generate() 621 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 622 allocs = append(allocs, alloc) 623 } 624 625 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 626 r := reconciler.Compute() 627 628 // Assert the correct results 629 assertResults(t, r, &resultExpectation{ 630 createDeployment: nil, 631 deploymentUpdates: nil, 632 place: 5, 633 destructive: 10, 634 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 635 job.TaskGroups[0].Name: { 636 Place: 5, 637 DestructiveUpdate: 10, 638 }, 639 }, 640 }) 641 642 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 643 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 644 } 645 646 // Tests the reconciler properly handles destructive upgrading allocations while 647 // scaling down 648 func TestReconciler_Destructive_ScaleDown(t *testing.T) { 649 // Set desired 5 650 job := mock.Job() 651 job.TaskGroups[0].Count = 5 652 653 // Create 10 existing allocations 654 var allocs []*structs.Allocation 655 for i := 0; i < 10; i++ { 656 alloc := mock.Alloc() 657 alloc.Job = job 658 alloc.JobID = job.ID 659 alloc.NodeID = uuid.Generate() 660 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 661 allocs = append(allocs, alloc) 662 } 663 664 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 665 r := reconciler.Compute() 666 667 // Assert the correct results 668 assertResults(t, r, &resultExpectation{ 669 createDeployment: nil, 670 deploymentUpdates: nil, 671 destructive: 5, 672 stop: 5, 673 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 674 job.TaskGroups[0].Name: { 675 Stop: 5, 676 DestructiveUpdate: 5, 677 }, 678 }, 679 }) 680 681 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 682 assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) 683 } 684 685 // Tests the reconciler properly handles lost nodes with allocations 686 func TestReconciler_LostNode(t *testing.T) { 687 job := mock.Job() 688 689 // Create 10 existing allocations 690 var allocs []*structs.Allocation 691 for i := 0; i < 10; i++ { 692 alloc := mock.Alloc() 693 alloc.Job = job 694 alloc.JobID = job.ID 695 alloc.NodeID = uuid.Generate() 696 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 697 allocs = append(allocs, alloc) 698 } 699 700 // Build a map of tainted nodes 701 tainted := make(map[string]*structs.Node, 2) 702 for i := 0; i < 2; i++ { 703 n := mock.Node() 704 n.ID = allocs[i].NodeID 705 n.Status = structs.NodeStatusDown 706 tainted[n.ID] = n 707 } 708 709 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 710 r := reconciler.Compute() 711 712 // Assert the correct results 713 assertResults(t, r, &resultExpectation{ 714 createDeployment: nil, 715 deploymentUpdates: nil, 716 place: 2, 717 inplace: 0, 718 stop: 2, 719 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 720 job.TaskGroups[0].Name: { 721 Place: 2, 722 Stop: 2, 723 Ignore: 8, 724 }, 725 }, 726 }) 727 728 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 729 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 730 } 731 732 // Tests the reconciler properly handles lost nodes with allocations while 733 // scaling up 734 func TestReconciler_LostNode_ScaleUp(t *testing.T) { 735 // Set desired 15 736 job := mock.Job() 737 job.TaskGroups[0].Count = 15 738 739 // Create 10 existing allocations 740 var allocs []*structs.Allocation 741 for i := 0; i < 10; i++ { 742 alloc := mock.Alloc() 743 alloc.Job = job 744 alloc.JobID = job.ID 745 alloc.NodeID = uuid.Generate() 746 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 747 allocs = append(allocs, alloc) 748 } 749 750 // Build a map of tainted nodes 751 tainted := make(map[string]*structs.Node, 2) 752 for i := 0; i < 2; i++ { 753 n := mock.Node() 754 n.ID = allocs[i].NodeID 755 n.Status = structs.NodeStatusDown 756 tainted[n.ID] = n 757 } 758 759 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 760 r := reconciler.Compute() 761 762 // Assert the correct results 763 assertResults(t, r, &resultExpectation{ 764 createDeployment: nil, 765 deploymentUpdates: nil, 766 place: 7, 767 inplace: 0, 768 stop: 2, 769 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 770 job.TaskGroups[0].Name: { 771 Place: 7, 772 Stop: 2, 773 Ignore: 8, 774 }, 775 }, 776 }) 777 778 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 779 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 780 } 781 782 // Tests the reconciler properly handles lost nodes with allocations while 783 // scaling down 784 func TestReconciler_LostNode_ScaleDown(t *testing.T) { 785 // Set desired 5 786 job := mock.Job() 787 job.TaskGroups[0].Count = 5 788 789 // Create 10 existing allocations 790 var allocs []*structs.Allocation 791 for i := 0; i < 10; i++ { 792 alloc := mock.Alloc() 793 alloc.Job = job 794 alloc.JobID = job.ID 795 alloc.NodeID = uuid.Generate() 796 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 797 allocs = append(allocs, alloc) 798 } 799 800 // Build a map of tainted nodes 801 tainted := make(map[string]*structs.Node, 2) 802 for i := 0; i < 2; i++ { 803 n := mock.Node() 804 n.ID = allocs[i].NodeID 805 n.Status = structs.NodeStatusDown 806 tainted[n.ID] = n 807 } 808 809 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 810 r := reconciler.Compute() 811 812 // Assert the correct results 813 assertResults(t, r, &resultExpectation{ 814 createDeployment: nil, 815 deploymentUpdates: nil, 816 place: 0, 817 inplace: 0, 818 stop: 5, 819 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 820 job.TaskGroups[0].Name: { 821 Stop: 5, 822 Ignore: 5, 823 }, 824 }, 825 }) 826 827 assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) 828 } 829 830 // Tests the reconciler properly handles draining nodes with allocations 831 func TestReconciler_DrainNode(t *testing.T) { 832 job := mock.Job() 833 834 // Create 10 existing allocations 835 var allocs []*structs.Allocation 836 for i := 0; i < 10; i++ { 837 alloc := mock.Alloc() 838 alloc.Job = job 839 alloc.JobID = job.ID 840 alloc.NodeID = uuid.Generate() 841 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 842 allocs = append(allocs, alloc) 843 } 844 845 // Build a map of tainted nodes 846 tainted := make(map[string]*structs.Node, 2) 847 for i := 0; i < 2; i++ { 848 n := mock.Node() 849 n.ID = allocs[i].NodeID 850 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 851 n.Drain = true 852 tainted[n.ID] = n 853 } 854 855 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 856 r := reconciler.Compute() 857 858 // Assert the correct results 859 assertResults(t, r, &resultExpectation{ 860 createDeployment: nil, 861 deploymentUpdates: nil, 862 place: 2, 863 inplace: 0, 864 stop: 2, 865 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 866 job.TaskGroups[0].Name: { 867 Migrate: 2, 868 Ignore: 8, 869 }, 870 }, 871 }) 872 873 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 874 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 875 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 876 // These should not have the reschedule field set 877 assertPlacementsAreRescheduled(t, 0, r.place) 878 } 879 880 // Tests the reconciler properly handles draining nodes with allocations while 881 // scaling up 882 func TestReconciler_DrainNode_ScaleUp(t *testing.T) { 883 // Set desired 15 884 job := mock.Job() 885 job.TaskGroups[0].Count = 15 886 887 // Create 10 existing allocations 888 var allocs []*structs.Allocation 889 for i := 0; i < 10; i++ { 890 alloc := mock.Alloc() 891 alloc.Job = job 892 alloc.JobID = job.ID 893 alloc.NodeID = uuid.Generate() 894 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 895 allocs = append(allocs, alloc) 896 } 897 898 // Build a map of tainted nodes 899 tainted := make(map[string]*structs.Node, 2) 900 for i := 0; i < 2; i++ { 901 n := mock.Node() 902 n.ID = allocs[i].NodeID 903 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 904 n.Drain = true 905 tainted[n.ID] = n 906 } 907 908 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 909 r := reconciler.Compute() 910 911 // Assert the correct results 912 assertResults(t, r, &resultExpectation{ 913 createDeployment: nil, 914 deploymentUpdates: nil, 915 place: 7, 916 inplace: 0, 917 stop: 2, 918 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 919 job.TaskGroups[0].Name: { 920 Place: 5, 921 Migrate: 2, 922 Ignore: 8, 923 }, 924 }, 925 }) 926 927 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 928 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 929 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 930 // These should not have the reschedule field set 931 assertPlacementsAreRescheduled(t, 0, r.place) 932 } 933 934 // Tests the reconciler properly handles draining nodes with allocations while 935 // scaling down 936 func TestReconciler_DrainNode_ScaleDown(t *testing.T) { 937 // Set desired 8 938 job := mock.Job() 939 job.TaskGroups[0].Count = 8 940 941 // Create 10 existing allocations 942 var allocs []*structs.Allocation 943 for i := 0; i < 10; i++ { 944 alloc := mock.Alloc() 945 alloc.Job = job 946 alloc.JobID = job.ID 947 alloc.NodeID = uuid.Generate() 948 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 949 allocs = append(allocs, alloc) 950 } 951 952 // Build a map of tainted nodes 953 tainted := make(map[string]*structs.Node, 3) 954 for i := 0; i < 3; i++ { 955 n := mock.Node() 956 n.ID = allocs[i].NodeID 957 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 958 n.Drain = true 959 tainted[n.ID] = n 960 } 961 962 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 963 r := reconciler.Compute() 964 965 // Assert the correct results 966 assertResults(t, r, &resultExpectation{ 967 createDeployment: nil, 968 deploymentUpdates: nil, 969 place: 1, 970 inplace: 0, 971 stop: 3, 972 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 973 job.TaskGroups[0].Name: { 974 Migrate: 1, 975 Stop: 2, 976 Ignore: 7, 977 }, 978 }, 979 }) 980 981 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 982 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 983 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 984 // These should not have the reschedule field set 985 assertPlacementsAreRescheduled(t, 0, r.place) 986 } 987 988 // Tests the reconciler properly handles a task group being removed 989 func TestReconciler_RemovedTG(t *testing.T) { 990 job := mock.Job() 991 992 // Create 10 allocations for a tg that no longer exists 993 var allocs []*structs.Allocation 994 for i := 0; i < 10; i++ { 995 alloc := mock.Alloc() 996 alloc.Job = job 997 alloc.JobID = job.ID 998 alloc.NodeID = uuid.Generate() 999 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1000 allocs = append(allocs, alloc) 1001 } 1002 1003 oldName := job.TaskGroups[0].Name 1004 newName := "different" 1005 job.TaskGroups[0].Name = newName 1006 1007 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1008 r := reconciler.Compute() 1009 1010 // Assert the correct results 1011 assertResults(t, r, &resultExpectation{ 1012 createDeployment: nil, 1013 deploymentUpdates: nil, 1014 place: 10, 1015 inplace: 0, 1016 stop: 10, 1017 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1018 oldName: { 1019 Stop: 10, 1020 }, 1021 newName: { 1022 Place: 10, 1023 }, 1024 }, 1025 }) 1026 1027 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1028 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 1029 } 1030 1031 // Tests the reconciler properly handles a job in stopped states 1032 func TestReconciler_JobStopped(t *testing.T) { 1033 job := mock.Job() 1034 job.Stop = true 1035 1036 cases := []struct { 1037 name string 1038 job *structs.Job 1039 jobID, taskGroup string 1040 }{ 1041 { 1042 name: "stopped job", 1043 job: job, 1044 jobID: job.ID, 1045 taskGroup: job.TaskGroups[0].Name, 1046 }, 1047 { 1048 name: "nil job", 1049 job: nil, 1050 jobID: "foo", 1051 taskGroup: "bar", 1052 }, 1053 } 1054 1055 for _, c := range cases { 1056 t.Run(c.name, func(t *testing.T) { 1057 // Create 10 allocations 1058 var allocs []*structs.Allocation 1059 for i := 0; i < 10; i++ { 1060 alloc := mock.Alloc() 1061 alloc.Job = c.job 1062 alloc.JobID = c.jobID 1063 alloc.NodeID = uuid.Generate() 1064 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1065 alloc.TaskGroup = c.taskGroup 1066 allocs = append(allocs, alloc) 1067 } 1068 1069 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1070 r := reconciler.Compute() 1071 1072 // Assert the correct results 1073 assertResults(t, r, &resultExpectation{ 1074 createDeployment: nil, 1075 deploymentUpdates: nil, 1076 place: 0, 1077 inplace: 0, 1078 stop: 10, 1079 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1080 c.taskGroup: { 1081 Stop: 10, 1082 }, 1083 }, 1084 }) 1085 1086 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1087 }) 1088 } 1089 } 1090 1091 // Tests the reconciler doesn't update allocs in terminal state 1092 // when job is stopped or nil 1093 func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) { 1094 job := mock.Job() 1095 job.Stop = true 1096 1097 cases := []struct { 1098 name string 1099 job *structs.Job 1100 jobID, taskGroup string 1101 }{ 1102 { 1103 name: "stopped job", 1104 job: job, 1105 jobID: job.ID, 1106 taskGroup: job.TaskGroups[0].Name, 1107 }, 1108 { 1109 name: "nil job", 1110 job: nil, 1111 jobID: "foo", 1112 taskGroup: "bar", 1113 }, 1114 } 1115 1116 for _, c := range cases { 1117 t.Run(c.name, func(t *testing.T) { 1118 // Create 10 terminal allocations 1119 var allocs []*structs.Allocation 1120 for i := 0; i < 10; i++ { 1121 alloc := mock.Alloc() 1122 alloc.Job = c.job 1123 alloc.JobID = c.jobID 1124 alloc.NodeID = uuid.Generate() 1125 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1126 alloc.TaskGroup = c.taskGroup 1127 if i%2 == 0 { 1128 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1129 } else { 1130 alloc.ClientStatus = structs.AllocClientStatusFailed 1131 } 1132 allocs = append(allocs, alloc) 1133 } 1134 1135 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1136 r := reconciler.Compute() 1137 require.Len(t, r.stop, 0) 1138 // Assert the correct results 1139 assertResults(t, r, &resultExpectation{ 1140 createDeployment: nil, 1141 deploymentUpdates: nil, 1142 place: 0, 1143 inplace: 0, 1144 stop: 0, 1145 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1146 c.taskGroup: {}, 1147 }, 1148 }) 1149 }) 1150 } 1151 } 1152 1153 // Tests the reconciler properly handles jobs with multiple task groups 1154 func TestReconciler_MultiTG(t *testing.T) { 1155 job := mock.Job() 1156 tg2 := job.TaskGroups[0].Copy() 1157 tg2.Name = "foo" 1158 job.TaskGroups = append(job.TaskGroups, tg2) 1159 1160 // Create 2 existing allocations for the first tg 1161 var allocs []*structs.Allocation 1162 for i := 0; i < 2; i++ { 1163 alloc := mock.Alloc() 1164 alloc.Job = job 1165 alloc.JobID = job.ID 1166 alloc.NodeID = uuid.Generate() 1167 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1168 allocs = append(allocs, alloc) 1169 } 1170 1171 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1172 r := reconciler.Compute() 1173 1174 // Assert the correct results 1175 assertResults(t, r, &resultExpectation{ 1176 createDeployment: nil, 1177 deploymentUpdates: nil, 1178 place: 18, 1179 inplace: 0, 1180 stop: 0, 1181 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1182 job.TaskGroups[0].Name: { 1183 Place: 8, 1184 Ignore: 2, 1185 }, 1186 tg2.Name: { 1187 Place: 10, 1188 }, 1189 }, 1190 }) 1191 1192 assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) 1193 } 1194 1195 // Tests the reconciler properly handles jobs with multiple task groups with 1196 // only one having an update stanza and a deployment already being created 1197 func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) { 1198 job := mock.Job() 1199 tg2 := job.TaskGroups[0].Copy() 1200 tg2.Name = "foo" 1201 job.TaskGroups = append(job.TaskGroups, tg2) 1202 job.TaskGroups[0].Update = noCanaryUpdate 1203 1204 // Create all the allocs 1205 var allocs []*structs.Allocation 1206 for i := 0; i < 2; i++ { 1207 for j := 0; j < 10; j++ { 1208 alloc := mock.Alloc() 1209 alloc.Job = job 1210 alloc.JobID = job.ID 1211 alloc.NodeID = uuid.Generate() 1212 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j)) 1213 alloc.TaskGroup = job.TaskGroups[i].Name 1214 allocs = append(allocs, alloc) 1215 } 1216 } 1217 1218 d := structs.NewDeployment(job) 1219 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1220 DesiredTotal: 10, 1221 } 1222 1223 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 1224 r := reconciler.Compute() 1225 1226 // Assert the correct results 1227 assertResults(t, r, &resultExpectation{ 1228 createDeployment: nil, 1229 deploymentUpdates: nil, 1230 place: 0, 1231 inplace: 0, 1232 stop: 0, 1233 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1234 job.TaskGroups[0].Name: { 1235 Ignore: 10, 1236 }, 1237 tg2.Name: { 1238 Ignore: 10, 1239 }, 1240 }, 1241 }) 1242 } 1243 1244 // Tests delayed rescheduling of failed batch allocations 1245 func TestReconciler_RescheduleLater_Batch(t *testing.T) { 1246 require := require.New(t) 1247 1248 // Set desired 4 1249 job := mock.Job() 1250 job.TaskGroups[0].Count = 4 1251 now := time.Now() 1252 1253 // Set up reschedule policy 1254 delayDur := 15 * time.Second 1255 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1256 tgName := job.TaskGroups[0].Name 1257 1258 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1259 var allocs []*structs.Allocation 1260 for i := 0; i < 6; i++ { 1261 alloc := mock.Alloc() 1262 alloc.Job = job 1263 alloc.JobID = job.ID 1264 alloc.NodeID = uuid.Generate() 1265 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1266 allocs = append(allocs, alloc) 1267 alloc.ClientStatus = structs.AllocClientStatusRunning 1268 } 1269 1270 // Mark 3 as failed with restart tracking info 1271 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1272 allocs[0].NextAllocation = allocs[1].ID 1273 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1274 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1275 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1276 PrevAllocID: allocs[0].ID, 1277 PrevNodeID: uuid.Generate(), 1278 }, 1279 }} 1280 allocs[1].NextAllocation = allocs[2].ID 1281 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1282 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1283 StartedAt: now.Add(-1 * time.Hour), 1284 FinishedAt: now}} 1285 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1286 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1287 PrevAllocID: allocs[0].ID, 1288 PrevNodeID: uuid.Generate(), 1289 }, 1290 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1291 PrevAllocID: allocs[1].ID, 1292 PrevNodeID: uuid.Generate(), 1293 }, 1294 }} 1295 1296 // Mark one as complete 1297 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1298 1299 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1300 r := reconciler.Compute() 1301 1302 // Two reschedule attempts were already made, one more can be made at a future time 1303 // Verify that the follow up eval has the expected waitUntil time 1304 evals := r.desiredFollowupEvals[tgName] 1305 require.NotNil(evals) 1306 require.Equal(1, len(evals)) 1307 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1308 1309 // Alloc 5 should not be replaced because it is terminal 1310 assertResults(t, r, &resultExpectation{ 1311 createDeployment: nil, 1312 deploymentUpdates: nil, 1313 place: 0, 1314 inplace: 0, 1315 attributeUpdates: 1, 1316 stop: 0, 1317 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1318 job.TaskGroups[0].Name: { 1319 Place: 0, 1320 InPlaceUpdate: 0, 1321 Ignore: 4, 1322 }, 1323 }, 1324 }) 1325 assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates)) 1326 1327 // Verify that the followup evalID field is set correctly 1328 var annotated *structs.Allocation 1329 for _, a := range r.attributeUpdates { 1330 annotated = a 1331 } 1332 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1333 } 1334 1335 // Tests delayed rescheduling of failed batch allocations and batching of allocs 1336 // with fail times that are close together 1337 func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { 1338 require := require.New(t) 1339 1340 // Set desired 4 1341 job := mock.Job() 1342 job.TaskGroups[0].Count = 10 1343 now := time.Now() 1344 1345 // Set up reschedule policy 1346 delayDur := 15 * time.Second 1347 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1348 tgName := job.TaskGroups[0].Name 1349 1350 // Create 10 existing allocations 1351 var allocs []*structs.Allocation 1352 for i := 0; i < 10; i++ { 1353 alloc := mock.Alloc() 1354 alloc.Job = job 1355 alloc.JobID = job.ID 1356 alloc.NodeID = uuid.Generate() 1357 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1358 allocs = append(allocs, alloc) 1359 alloc.ClientStatus = structs.AllocClientStatusRunning 1360 } 1361 1362 // Mark 5 as failed with fail times very close together 1363 for i := 0; i < 5; i++ { 1364 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1365 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1366 StartedAt: now.Add(-1 * time.Hour), 1367 FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}} 1368 } 1369 1370 // Mark two more as failed several seconds later 1371 for i := 5; i < 7; i++ { 1372 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1373 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1374 StartedAt: now.Add(-1 * time.Hour), 1375 FinishedAt: now.Add(10 * time.Second)}} 1376 } 1377 1378 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1379 r := reconciler.Compute() 1380 1381 // Verify that two follow up evals were created 1382 evals := r.desiredFollowupEvals[tgName] 1383 require.NotNil(evals) 1384 require.Equal(2, len(evals)) 1385 1386 // Verify expected WaitUntil values for both batched evals 1387 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1388 secondBatchDuration := delayDur + 10*time.Second 1389 require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil) 1390 1391 // Alloc 5 should not be replaced because it is terminal 1392 assertResults(t, r, &resultExpectation{ 1393 createDeployment: nil, 1394 deploymentUpdates: nil, 1395 place: 0, 1396 inplace: 0, 1397 attributeUpdates: 7, 1398 stop: 0, 1399 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1400 job.TaskGroups[0].Name: { 1401 Place: 0, 1402 InPlaceUpdate: 0, 1403 Ignore: 10, 1404 }, 1405 }, 1406 }) 1407 assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates)) 1408 1409 // Verify that the followup evalID field is set correctly 1410 for _, alloc := range r.attributeUpdates { 1411 if allocNameToIndex(alloc.Name) < 5 { 1412 require.Equal(evals[0].ID, alloc.FollowupEvalID) 1413 } else if allocNameToIndex(alloc.Name) < 7 { 1414 require.Equal(evals[1].ID, alloc.FollowupEvalID) 1415 } else { 1416 t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name) 1417 } 1418 } 1419 } 1420 1421 // Tests rescheduling failed batch allocations 1422 func TestReconciler_RescheduleNow_Batch(t *testing.T) { 1423 require := require.New(t) 1424 // Set desired 4 1425 job := mock.Job() 1426 job.TaskGroups[0].Count = 4 1427 now := time.Now() 1428 // Set up reschedule policy 1429 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"} 1430 tgName := job.TaskGroups[0].Name 1431 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1432 var allocs []*structs.Allocation 1433 for i := 0; i < 6; i++ { 1434 alloc := mock.Alloc() 1435 alloc.Job = job 1436 alloc.JobID = job.ID 1437 alloc.NodeID = uuid.Generate() 1438 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1439 allocs = append(allocs, alloc) 1440 alloc.ClientStatus = structs.AllocClientStatusRunning 1441 } 1442 // Mark 3 as failed with restart tracking info 1443 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1444 allocs[0].NextAllocation = allocs[1].ID 1445 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1446 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1447 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1448 PrevAllocID: allocs[0].ID, 1449 PrevNodeID: uuid.Generate(), 1450 }, 1451 }} 1452 allocs[1].NextAllocation = allocs[2].ID 1453 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1454 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1455 StartedAt: now.Add(-1 * time.Hour), 1456 FinishedAt: now.Add(-5 * time.Second)}} 1457 allocs[2].FollowupEvalID = uuid.Generate() 1458 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1459 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1460 PrevAllocID: allocs[0].ID, 1461 PrevNodeID: uuid.Generate(), 1462 }, 1463 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1464 PrevAllocID: allocs[1].ID, 1465 PrevNodeID: uuid.Generate(), 1466 }, 1467 }} 1468 // Mark one as complete 1469 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1470 1471 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "") 1472 reconciler.now = now 1473 r := reconciler.Compute() 1474 1475 // Verify that no follow up evals were created 1476 evals := r.desiredFollowupEvals[tgName] 1477 require.Nil(evals) 1478 1479 // Two reschedule attempts were made, one more can be made now 1480 // Alloc 5 should not be replaced because it is terminal 1481 assertResults(t, r, &resultExpectation{ 1482 createDeployment: nil, 1483 deploymentUpdates: nil, 1484 place: 1, 1485 inplace: 0, 1486 stop: 0, 1487 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1488 job.TaskGroups[0].Name: { 1489 Place: 1, 1490 Ignore: 3, 1491 }, 1492 }, 1493 }) 1494 1495 assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) 1496 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1497 assertPlacementsAreRescheduled(t, 1, r.place) 1498 1499 } 1500 1501 // Tests rescheduling failed service allocations with desired state stop 1502 func TestReconciler_RescheduleLater_Service(t *testing.T) { 1503 require := require.New(t) 1504 1505 // Set desired 5 1506 job := mock.Job() 1507 job.TaskGroups[0].Count = 5 1508 tgName := job.TaskGroups[0].Name 1509 now := time.Now() 1510 1511 // Set up reschedule policy 1512 delayDur := 15 * time.Second 1513 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} 1514 1515 // Create 5 existing allocations 1516 var allocs []*structs.Allocation 1517 for i := 0; i < 5; i++ { 1518 alloc := mock.Alloc() 1519 alloc.Job = job 1520 alloc.JobID = job.ID 1521 alloc.NodeID = uuid.Generate() 1522 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1523 allocs = append(allocs, alloc) 1524 alloc.ClientStatus = structs.AllocClientStatusRunning 1525 } 1526 1527 // Mark two as failed 1528 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1529 1530 // Mark one of them as already rescheduled once 1531 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1532 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1533 PrevAllocID: uuid.Generate(), 1534 PrevNodeID: uuid.Generate(), 1535 }, 1536 }} 1537 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1538 StartedAt: now.Add(-1 * time.Hour), 1539 FinishedAt: now}} 1540 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1541 1542 // Mark one as desired state stop 1543 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1544 1545 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate()) 1546 r := reconciler.Compute() 1547 1548 // Should place a new placement and create a follow up eval for the delayed reschedule 1549 // Verify that the follow up eval has the expected waitUntil time 1550 evals := r.desiredFollowupEvals[tgName] 1551 require.NotNil(evals) 1552 require.Equal(1, len(evals)) 1553 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1554 1555 assertResults(t, r, &resultExpectation{ 1556 createDeployment: nil, 1557 deploymentUpdates: nil, 1558 place: 1, 1559 inplace: 0, 1560 attributeUpdates: 1, 1561 stop: 0, 1562 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1563 job.TaskGroups[0].Name: { 1564 Place: 1, 1565 InPlaceUpdate: 0, 1566 Ignore: 4, 1567 }, 1568 }, 1569 }) 1570 1571 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1572 assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates)) 1573 1574 // Verify that the followup evalID field is set correctly 1575 var annotated *structs.Allocation 1576 for _, a := range r.attributeUpdates { 1577 annotated = a 1578 } 1579 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1580 } 1581 1582 // Tests service allocations with client status complete 1583 func TestReconciler_Service_ClientStatusComplete(t *testing.T) { 1584 // Set desired 5 1585 job := mock.Job() 1586 job.TaskGroups[0].Count = 5 1587 1588 // Set up reschedule policy 1589 delayDur := 15 * time.Second 1590 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1591 Attempts: 1, 1592 Interval: 24 * time.Hour, 1593 Delay: delayDur, 1594 MaxDelay: 1 * time.Hour, 1595 } 1596 1597 // Create 5 existing allocations 1598 var allocs []*structs.Allocation 1599 for i := 0; i < 5; i++ { 1600 alloc := mock.Alloc() 1601 alloc.Job = job 1602 alloc.JobID = job.ID 1603 alloc.NodeID = uuid.Generate() 1604 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1605 allocs = append(allocs, alloc) 1606 alloc.ClientStatus = structs.AllocClientStatusRunning 1607 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1608 } 1609 1610 // Mark one as client status complete 1611 allocs[4].ClientStatus = structs.AllocClientStatusComplete 1612 1613 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1614 r := reconciler.Compute() 1615 1616 // Should place a new placement for the alloc that was marked complete 1617 assertResults(t, r, &resultExpectation{ 1618 createDeployment: nil, 1619 deploymentUpdates: nil, 1620 place: 1, 1621 inplace: 0, 1622 stop: 0, 1623 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1624 job.TaskGroups[0].Name: { 1625 Place: 1, 1626 InPlaceUpdate: 0, 1627 Ignore: 4, 1628 }, 1629 }, 1630 }) 1631 1632 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1633 1634 } 1635 1636 // Tests service job placement with desired stop and client status complete 1637 func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { 1638 // Set desired 5 1639 job := mock.Job() 1640 job.TaskGroups[0].Count = 5 1641 1642 // Set up reschedule policy 1643 delayDur := 15 * time.Second 1644 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1645 Attempts: 1, 1646 Interval: 24 * time.Hour, 1647 Delay: delayDur, 1648 MaxDelay: 1 * time.Hour, 1649 } 1650 1651 // Create 5 existing allocations 1652 var allocs []*structs.Allocation 1653 for i := 0; i < 5; i++ { 1654 alloc := mock.Alloc() 1655 alloc.Job = job 1656 alloc.JobID = job.ID 1657 alloc.NodeID = uuid.Generate() 1658 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1659 allocs = append(allocs, alloc) 1660 alloc.ClientStatus = structs.AllocClientStatusRunning 1661 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1662 } 1663 1664 // Mark one as failed but with desired status stop 1665 // Should not trigger rescheduling logic but should trigger a placement 1666 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1667 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1668 1669 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1670 r := reconciler.Compute() 1671 1672 // Should place a new placement for the alloc that was marked stopped 1673 assertResults(t, r, &resultExpectation{ 1674 createDeployment: nil, 1675 deploymentUpdates: nil, 1676 place: 1, 1677 inplace: 0, 1678 stop: 0, 1679 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1680 job.TaskGroups[0].Name: { 1681 Place: 1, 1682 InPlaceUpdate: 0, 1683 Ignore: 4, 1684 }, 1685 }, 1686 }) 1687 1688 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1689 1690 // Should not have any follow up evals created 1691 require := require.New(t) 1692 require.Equal(0, len(r.desiredFollowupEvals)) 1693 } 1694 1695 // Tests rescheduling failed service allocations with desired state stop 1696 func TestReconciler_RescheduleNow_Service(t *testing.T) { 1697 require := require.New(t) 1698 1699 // Set desired 5 1700 job := mock.Job() 1701 job.TaskGroups[0].Count = 5 1702 tgName := job.TaskGroups[0].Name 1703 now := time.Now() 1704 1705 // Set up reschedule policy and update stanza 1706 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1707 Attempts: 1, 1708 Interval: 24 * time.Hour, 1709 Delay: 5 * time.Second, 1710 DelayFunction: "", 1711 MaxDelay: 1 * time.Hour, 1712 Unlimited: false, 1713 } 1714 job.TaskGroups[0].Update = noCanaryUpdate 1715 1716 // Create 5 existing allocations 1717 var allocs []*structs.Allocation 1718 for i := 0; i < 5; i++ { 1719 alloc := mock.Alloc() 1720 alloc.Job = job 1721 alloc.JobID = job.ID 1722 alloc.NodeID = uuid.Generate() 1723 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1724 allocs = append(allocs, alloc) 1725 alloc.ClientStatus = structs.AllocClientStatusRunning 1726 } 1727 1728 // Mark two as failed 1729 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1730 1731 // Mark one of them as already rescheduled once 1732 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1733 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1734 PrevAllocID: uuid.Generate(), 1735 PrevNodeID: uuid.Generate(), 1736 }, 1737 }} 1738 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1739 StartedAt: now.Add(-1 * time.Hour), 1740 FinishedAt: now.Add(-10 * time.Second)}} 1741 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1742 1743 // Mark one as desired state stop 1744 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1745 1746 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1747 r := reconciler.Compute() 1748 1749 // Verify that no follow up evals were created 1750 evals := r.desiredFollowupEvals[tgName] 1751 require.Nil(evals) 1752 1753 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 1754 assertResults(t, r, &resultExpectation{ 1755 createDeployment: nil, 1756 deploymentUpdates: nil, 1757 place: 2, 1758 inplace: 0, 1759 stop: 0, 1760 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1761 job.TaskGroups[0].Name: { 1762 Place: 2, 1763 Ignore: 3, 1764 }, 1765 }, 1766 }) 1767 1768 // Rescheduled allocs should have previous allocs 1769 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 1770 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1771 assertPlacementsAreRescheduled(t, 1, r.place) 1772 } 1773 1774 // Tests rescheduling failed service allocations when there's clock drift (upto a second) 1775 func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { 1776 require := require.New(t) 1777 1778 // Set desired 5 1779 job := mock.Job() 1780 job.TaskGroups[0].Count = 5 1781 tgName := job.TaskGroups[0].Name 1782 now := time.Now() 1783 1784 // Set up reschedule policy and update stanza 1785 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1786 Attempts: 1, 1787 Interval: 24 * time.Hour, 1788 Delay: 5 * time.Second, 1789 DelayFunction: "", 1790 MaxDelay: 1 * time.Hour, 1791 Unlimited: false, 1792 } 1793 job.TaskGroups[0].Update = noCanaryUpdate 1794 1795 // Create 5 existing allocations 1796 var allocs []*structs.Allocation 1797 for i := 0; i < 5; i++ { 1798 alloc := mock.Alloc() 1799 alloc.Job = job 1800 alloc.JobID = job.ID 1801 alloc.NodeID = uuid.Generate() 1802 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1803 allocs = append(allocs, alloc) 1804 alloc.ClientStatus = structs.AllocClientStatusRunning 1805 } 1806 1807 // Mark one as failed 1808 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1809 1810 // Mark one of them as already rescheduled once 1811 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1812 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1813 PrevAllocID: uuid.Generate(), 1814 PrevNodeID: uuid.Generate(), 1815 }, 1816 }} 1817 // Set fail time to 4 seconds ago which falls within the reschedule window 1818 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1819 StartedAt: now.Add(-1 * time.Hour), 1820 FinishedAt: now.Add(-4 * time.Second)}} 1821 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1822 1823 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1824 reconciler.now = now 1825 r := reconciler.Compute() 1826 1827 // Verify that no follow up evals were created 1828 evals := r.desiredFollowupEvals[tgName] 1829 require.Nil(evals) 1830 1831 // Verify that one rescheduled alloc was placed 1832 assertResults(t, r, &resultExpectation{ 1833 createDeployment: nil, 1834 deploymentUpdates: nil, 1835 place: 1, 1836 inplace: 0, 1837 stop: 0, 1838 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1839 job.TaskGroups[0].Name: { 1840 Place: 1, 1841 Ignore: 4, 1842 }, 1843 }, 1844 }) 1845 1846 // Rescheduled allocs should have previous allocs 1847 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1848 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1849 assertPlacementsAreRescheduled(t, 1, r.place) 1850 } 1851 1852 // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift 1853 func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { 1854 require := require.New(t) 1855 1856 // Set desired 5 1857 job := mock.Job() 1858 job.TaskGroups[0].Count = 5 1859 tgName := job.TaskGroups[0].Name 1860 now := time.Now() 1861 1862 // Set up reschedule policy and update stanza 1863 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1864 Attempts: 1, 1865 Interval: 24 * time.Hour, 1866 Delay: 5 * time.Second, 1867 DelayFunction: "", 1868 MaxDelay: 1 * time.Hour, 1869 Unlimited: false, 1870 } 1871 job.TaskGroups[0].Update = noCanaryUpdate 1872 1873 // Create 5 existing allocations 1874 var allocs []*structs.Allocation 1875 for i := 0; i < 5; i++ { 1876 alloc := mock.Alloc() 1877 alloc.Job = job 1878 alloc.JobID = job.ID 1879 alloc.NodeID = uuid.Generate() 1880 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1881 allocs = append(allocs, alloc) 1882 alloc.ClientStatus = structs.AllocClientStatusRunning 1883 } 1884 1885 // Mark one as failed 1886 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1887 1888 // Mark one of them as already rescheduled once 1889 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1890 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1891 PrevAllocID: uuid.Generate(), 1892 PrevNodeID: uuid.Generate(), 1893 }, 1894 }} 1895 // Set fail time to 5 seconds ago and eval ID 1896 evalID := uuid.Generate() 1897 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1898 StartedAt: now.Add(-1 * time.Hour), 1899 FinishedAt: now.Add(-5 * time.Second)}} 1900 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1901 allocs[1].FollowupEvalID = evalID 1902 1903 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID) 1904 reconciler.now = now.Add(-30 * time.Second) 1905 r := reconciler.Compute() 1906 1907 // Verify that no follow up evals were created 1908 evals := r.desiredFollowupEvals[tgName] 1909 require.Nil(evals) 1910 1911 // Verify that one rescheduled alloc was placed 1912 assertResults(t, r, &resultExpectation{ 1913 createDeployment: nil, 1914 deploymentUpdates: nil, 1915 place: 1, 1916 inplace: 0, 1917 stop: 0, 1918 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1919 job.TaskGroups[0].Name: { 1920 Place: 1, 1921 Ignore: 4, 1922 }, 1923 }, 1924 }) 1925 1926 // Rescheduled allocs should have previous allocs 1927 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1928 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1929 assertPlacementsAreRescheduled(t, 1, r.place) 1930 } 1931 1932 // Tests rescheduling failed service allocations when there are canaries 1933 func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { 1934 require := require.New(t) 1935 1936 // Set desired 5 1937 job := mock.Job() 1938 job.TaskGroups[0].Count = 5 1939 tgName := job.TaskGroups[0].Name 1940 now := time.Now() 1941 1942 // Set up reschedule policy and update stanza 1943 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1944 Attempts: 1, 1945 Interval: 24 * time.Hour, 1946 Delay: 5 * time.Second, 1947 DelayFunction: "", 1948 MaxDelay: 1 * time.Hour, 1949 Unlimited: false, 1950 } 1951 job.TaskGroups[0].Update = canaryUpdate 1952 1953 job2 := job.Copy() 1954 job2.Version++ 1955 1956 d := structs.NewDeployment(job2) 1957 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1958 s := &structs.DeploymentState{ 1959 DesiredCanaries: 2, 1960 DesiredTotal: 5, 1961 } 1962 d.TaskGroups[job.TaskGroups[0].Name] = s 1963 1964 // Create 5 existing allocations 1965 var allocs []*structs.Allocation 1966 for i := 0; i < 5; i++ { 1967 alloc := mock.Alloc() 1968 alloc.Job = job 1969 alloc.JobID = job.ID 1970 alloc.NodeID = uuid.Generate() 1971 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1972 allocs = append(allocs, alloc) 1973 alloc.ClientStatus = structs.AllocClientStatusRunning 1974 } 1975 1976 // Mark three as failed 1977 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1978 1979 // Mark one of them as already rescheduled once 1980 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1981 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1982 PrevAllocID: uuid.Generate(), 1983 PrevNodeID: uuid.Generate(), 1984 }, 1985 }} 1986 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1987 StartedAt: now.Add(-1 * time.Hour), 1988 FinishedAt: now.Add(-10 * time.Second)}} 1989 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1990 1991 // Mark one as desired state stop 1992 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1993 1994 // Create 2 canary allocations 1995 for i := 0; i < 2; i++ { 1996 alloc := mock.Alloc() 1997 alloc.Job = job 1998 alloc.JobID = job.ID 1999 alloc.NodeID = uuid.Generate() 2000 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2001 alloc.ClientStatus = structs.AllocClientStatusRunning 2002 alloc.DeploymentID = d.ID 2003 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2004 Canary: true, 2005 Healthy: helper.BoolToPtr(false), 2006 } 2007 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2008 allocs = append(allocs, alloc) 2009 } 2010 2011 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2012 r := reconciler.Compute() 2013 2014 // Verify that no follow up evals were created 2015 evals := r.desiredFollowupEvals[tgName] 2016 require.Nil(evals) 2017 2018 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2019 assertResults(t, r, &resultExpectation{ 2020 createDeployment: nil, 2021 deploymentUpdates: nil, 2022 place: 2, 2023 inplace: 0, 2024 stop: 0, 2025 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2026 job.TaskGroups[0].Name: { 2027 Place: 2, 2028 Ignore: 5, 2029 }, 2030 }, 2031 }) 2032 2033 // Rescheduled allocs should have previous allocs 2034 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 2035 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 2036 assertPlacementsAreRescheduled(t, 2, r.place) 2037 } 2038 2039 // Tests rescheduling failed canary service allocations 2040 func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { 2041 require := require.New(t) 2042 2043 // Set desired 5 2044 job := mock.Job() 2045 job.TaskGroups[0].Count = 5 2046 tgName := job.TaskGroups[0].Name 2047 now := time.Now() 2048 2049 // Set up reschedule policy and update stanza 2050 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2051 Delay: 5 * time.Second, 2052 DelayFunction: "constant", 2053 MaxDelay: 1 * time.Hour, 2054 Unlimited: true, 2055 } 2056 job.TaskGroups[0].Update = canaryUpdate 2057 2058 job2 := job.Copy() 2059 job2.Version++ 2060 2061 d := structs.NewDeployment(job2) 2062 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2063 s := &structs.DeploymentState{ 2064 DesiredCanaries: 2, 2065 DesiredTotal: 5, 2066 } 2067 d.TaskGroups[job.TaskGroups[0].Name] = s 2068 2069 // Create 5 existing allocations 2070 var allocs []*structs.Allocation 2071 for i := 0; i < 5; i++ { 2072 alloc := mock.Alloc() 2073 alloc.Job = job 2074 alloc.JobID = job.ID 2075 alloc.NodeID = uuid.Generate() 2076 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2077 allocs = append(allocs, alloc) 2078 alloc.ClientStatus = structs.AllocClientStatusRunning 2079 } 2080 2081 // Create 2 healthy canary allocations 2082 for i := 0; i < 2; i++ { 2083 alloc := mock.Alloc() 2084 alloc.Job = job 2085 alloc.JobID = job.ID 2086 alloc.NodeID = uuid.Generate() 2087 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2088 alloc.ClientStatus = structs.AllocClientStatusRunning 2089 alloc.DeploymentID = d.ID 2090 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2091 Canary: true, 2092 Healthy: helper.BoolToPtr(false), 2093 } 2094 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2095 allocs = append(allocs, alloc) 2096 } 2097 2098 // Mark the canaries as failed 2099 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2100 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2101 2102 // Mark one of them as already rescheduled once 2103 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2104 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2105 PrevAllocID: uuid.Generate(), 2106 PrevNodeID: uuid.Generate(), 2107 }, 2108 }} 2109 2110 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2111 StartedAt: now.Add(-1 * time.Hour), 2112 FinishedAt: now.Add(-10 * time.Second)}} 2113 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2114 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2115 2116 // Create 4 unhealthy canary allocations that have already been replaced 2117 for i := 0; i < 4; i++ { 2118 alloc := mock.Alloc() 2119 alloc.Job = job 2120 alloc.JobID = job.ID 2121 alloc.NodeID = uuid.Generate() 2122 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2123 alloc.ClientStatus = structs.AllocClientStatusFailed 2124 alloc.DeploymentID = d.ID 2125 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2126 Canary: true, 2127 Healthy: helper.BoolToPtr(false), 2128 } 2129 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2130 allocs = append(allocs, alloc) 2131 } 2132 2133 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2134 reconciler.now = now 2135 r := reconciler.Compute() 2136 2137 // Verify that no follow up evals were created 2138 evals := r.desiredFollowupEvals[tgName] 2139 require.Nil(evals) 2140 2141 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2142 assertResults(t, r, &resultExpectation{ 2143 createDeployment: nil, 2144 deploymentUpdates: nil, 2145 place: 2, 2146 inplace: 0, 2147 stop: 0, 2148 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2149 job.TaskGroups[0].Name: { 2150 Place: 2, 2151 Ignore: 9, 2152 }, 2153 }, 2154 }) 2155 2156 // Rescheduled allocs should have previous allocs 2157 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2158 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 2159 assertPlacementsAreRescheduled(t, 2, r.place) 2160 } 2161 2162 // Tests rescheduling failed canary service allocations when one has reached its 2163 // reschedule limit 2164 func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { 2165 require := require.New(t) 2166 2167 // Set desired 5 2168 job := mock.Job() 2169 job.TaskGroups[0].Count = 5 2170 tgName := job.TaskGroups[0].Name 2171 now := time.Now() 2172 2173 // Set up reschedule policy and update stanza 2174 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2175 Attempts: 1, 2176 Interval: 24 * time.Hour, 2177 Delay: 5 * time.Second, 2178 DelayFunction: "", 2179 MaxDelay: 1 * time.Hour, 2180 Unlimited: false, 2181 } 2182 job.TaskGroups[0].Update = canaryUpdate 2183 2184 job2 := job.Copy() 2185 job2.Version++ 2186 2187 d := structs.NewDeployment(job2) 2188 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2189 s := &structs.DeploymentState{ 2190 DesiredCanaries: 2, 2191 DesiredTotal: 5, 2192 } 2193 d.TaskGroups[job.TaskGroups[0].Name] = s 2194 2195 // Create 5 existing allocations 2196 var allocs []*structs.Allocation 2197 for i := 0; i < 5; i++ { 2198 alloc := mock.Alloc() 2199 alloc.Job = job 2200 alloc.JobID = job.ID 2201 alloc.NodeID = uuid.Generate() 2202 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2203 allocs = append(allocs, alloc) 2204 alloc.ClientStatus = structs.AllocClientStatusRunning 2205 } 2206 2207 // Create 2 healthy canary allocations 2208 for i := 0; i < 2; i++ { 2209 alloc := mock.Alloc() 2210 alloc.Job = job 2211 alloc.JobID = job.ID 2212 alloc.NodeID = uuid.Generate() 2213 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2214 alloc.ClientStatus = structs.AllocClientStatusRunning 2215 alloc.DeploymentID = d.ID 2216 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2217 Canary: true, 2218 Healthy: helper.BoolToPtr(false), 2219 } 2220 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2221 allocs = append(allocs, alloc) 2222 } 2223 2224 // Mark the canaries as failed 2225 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2226 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2227 2228 // Mark one of them as already rescheduled once 2229 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2230 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2231 PrevAllocID: uuid.Generate(), 2232 PrevNodeID: uuid.Generate(), 2233 }, 2234 }} 2235 2236 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2237 StartedAt: now.Add(-1 * time.Hour), 2238 FinishedAt: now.Add(-10 * time.Second)}} 2239 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2240 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2241 2242 // Create 4 unhealthy canary allocations that have already been replaced 2243 for i := 0; i < 4; i++ { 2244 alloc := mock.Alloc() 2245 alloc.Job = job 2246 alloc.JobID = job.ID 2247 alloc.NodeID = uuid.Generate() 2248 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2249 alloc.ClientStatus = structs.AllocClientStatusFailed 2250 alloc.DeploymentID = d.ID 2251 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2252 Canary: true, 2253 Healthy: helper.BoolToPtr(false), 2254 } 2255 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2256 allocs = append(allocs, alloc) 2257 } 2258 2259 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2260 reconciler.now = now 2261 r := reconciler.Compute() 2262 2263 // Verify that no follow up evals were created 2264 evals := r.desiredFollowupEvals[tgName] 2265 require.Nil(evals) 2266 2267 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2268 assertResults(t, r, &resultExpectation{ 2269 createDeployment: nil, 2270 deploymentUpdates: nil, 2271 place: 1, 2272 inplace: 0, 2273 stop: 0, 2274 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2275 job.TaskGroups[0].Name: { 2276 Place: 1, 2277 Ignore: 10, 2278 }, 2279 }, 2280 }) 2281 2282 // Rescheduled allocs should have previous allocs 2283 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2284 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 2285 assertPlacementsAreRescheduled(t, 1, r.place) 2286 } 2287 2288 // Tests failed service allocations that were already rescheduled won't be rescheduled again 2289 func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { 2290 // Set desired 5 2291 job := mock.Job() 2292 job.TaskGroups[0].Count = 5 2293 2294 // Set up reschedule policy 2295 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour} 2296 2297 // Create 7 existing allocations 2298 var allocs []*structs.Allocation 2299 for i := 0; i < 7; i++ { 2300 alloc := mock.Alloc() 2301 alloc.Job = job 2302 alloc.JobID = job.ID 2303 alloc.NodeID = uuid.Generate() 2304 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2305 allocs = append(allocs, alloc) 2306 alloc.ClientStatus = structs.AllocClientStatusRunning 2307 } 2308 // Mark two as failed and rescheduled 2309 allocs[0].ClientStatus = structs.AllocClientStatusFailed 2310 allocs[0].ID = allocs[1].ID 2311 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2312 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2313 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 2314 PrevAllocID: uuid.Generate(), 2315 PrevNodeID: uuid.Generate(), 2316 }, 2317 }} 2318 allocs[1].NextAllocation = allocs[2].ID 2319 2320 // Mark one as desired state stop 2321 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 2322 2323 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2324 r := reconciler.Compute() 2325 2326 // Should place 1 - one is a new placement to make up the desired count of 5 2327 // failing allocs are not rescheduled 2328 assertResults(t, r, &resultExpectation{ 2329 createDeployment: nil, 2330 deploymentUpdates: nil, 2331 place: 1, 2332 inplace: 0, 2333 stop: 0, 2334 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2335 job.TaskGroups[0].Name: { 2336 Place: 1, 2337 Ignore: 4, 2338 }, 2339 }, 2340 }) 2341 2342 // name index 0 is used for the replacement because its 2343 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 2344 } 2345 2346 // Tests the reconciler cancels an old deployment when the job is being stopped 2347 func TestReconciler_CancelDeployment_JobStop(t *testing.T) { 2348 job := mock.Job() 2349 job.Stop = true 2350 2351 running := structs.NewDeployment(job) 2352 failed := structs.NewDeployment(job) 2353 failed.Status = structs.DeploymentStatusFailed 2354 2355 cases := []struct { 2356 name string 2357 job *structs.Job 2358 jobID, taskGroup string 2359 deployment *structs.Deployment 2360 cancel bool 2361 }{ 2362 { 2363 name: "stopped job, running deployment", 2364 job: job, 2365 jobID: job.ID, 2366 taskGroup: job.TaskGroups[0].Name, 2367 deployment: running, 2368 cancel: true, 2369 }, 2370 { 2371 name: "nil job, running deployment", 2372 job: nil, 2373 jobID: "foo", 2374 taskGroup: "bar", 2375 deployment: running, 2376 cancel: true, 2377 }, 2378 { 2379 name: "stopped job, failed deployment", 2380 job: job, 2381 jobID: job.ID, 2382 taskGroup: job.TaskGroups[0].Name, 2383 deployment: failed, 2384 cancel: false, 2385 }, 2386 { 2387 name: "nil job, failed deployment", 2388 job: nil, 2389 jobID: "foo", 2390 taskGroup: "bar", 2391 deployment: failed, 2392 cancel: false, 2393 }, 2394 } 2395 2396 for _, c := range cases { 2397 t.Run(c.name, func(t *testing.T) { 2398 // Create 10 allocations 2399 var allocs []*structs.Allocation 2400 for i := 0; i < 10; i++ { 2401 alloc := mock.Alloc() 2402 alloc.Job = c.job 2403 alloc.JobID = c.jobID 2404 alloc.NodeID = uuid.Generate() 2405 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 2406 alloc.TaskGroup = c.taskGroup 2407 allocs = append(allocs, alloc) 2408 } 2409 2410 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "") 2411 r := reconciler.Compute() 2412 2413 var updates []*structs.DeploymentStatusUpdate 2414 if c.cancel { 2415 updates = []*structs.DeploymentStatusUpdate{ 2416 { 2417 DeploymentID: c.deployment.ID, 2418 Status: structs.DeploymentStatusCancelled, 2419 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 2420 }, 2421 } 2422 } 2423 2424 // Assert the correct results 2425 assertResults(t, r, &resultExpectation{ 2426 createDeployment: nil, 2427 deploymentUpdates: updates, 2428 place: 0, 2429 inplace: 0, 2430 stop: 10, 2431 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2432 c.taskGroup: { 2433 Stop: 10, 2434 }, 2435 }, 2436 }) 2437 2438 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 2439 }) 2440 } 2441 } 2442 2443 // Tests the reconciler cancels an old deployment when the job is updated 2444 func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { 2445 // Create a base job 2446 job := mock.Job() 2447 2448 // Create two deployments 2449 running := structs.NewDeployment(job) 2450 failed := structs.NewDeployment(job) 2451 failed.Status = structs.DeploymentStatusFailed 2452 2453 // Make the job newer than the deployment 2454 job.Version += 10 2455 2456 cases := []struct { 2457 name string 2458 deployment *structs.Deployment 2459 cancel bool 2460 }{ 2461 { 2462 name: "running deployment", 2463 deployment: running, 2464 cancel: true, 2465 }, 2466 { 2467 name: "failed deployment", 2468 deployment: failed, 2469 cancel: false, 2470 }, 2471 } 2472 2473 for _, c := range cases { 2474 t.Run(c.name, func(t *testing.T) { 2475 // Create 10 allocations 2476 var allocs []*structs.Allocation 2477 for i := 0; i < 10; i++ { 2478 alloc := mock.Alloc() 2479 alloc.Job = job 2480 alloc.JobID = job.ID 2481 alloc.NodeID = uuid.Generate() 2482 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2483 alloc.TaskGroup = job.TaskGroups[0].Name 2484 allocs = append(allocs, alloc) 2485 } 2486 2487 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "") 2488 r := reconciler.Compute() 2489 2490 var updates []*structs.DeploymentStatusUpdate 2491 if c.cancel { 2492 updates = []*structs.DeploymentStatusUpdate{ 2493 { 2494 DeploymentID: c.deployment.ID, 2495 Status: structs.DeploymentStatusCancelled, 2496 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2497 }, 2498 } 2499 } 2500 2501 // Assert the correct results 2502 assertResults(t, r, &resultExpectation{ 2503 createDeployment: nil, 2504 deploymentUpdates: updates, 2505 place: 0, 2506 inplace: 0, 2507 stop: 0, 2508 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2509 job.TaskGroups[0].Name: { 2510 Ignore: 10, 2511 }, 2512 }, 2513 }) 2514 }) 2515 } 2516 } 2517 2518 // Tests the reconciler creates a deployment and does a rolling upgrade with 2519 // destructive changes 2520 func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { 2521 job := mock.Job() 2522 job.TaskGroups[0].Update = noCanaryUpdate 2523 2524 // Create 10 allocations from the old job 2525 var allocs []*structs.Allocation 2526 for i := 0; i < 10; i++ { 2527 alloc := mock.Alloc() 2528 alloc.Job = job 2529 alloc.JobID = job.ID 2530 alloc.NodeID = uuid.Generate() 2531 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2532 alloc.TaskGroup = job.TaskGroups[0].Name 2533 allocs = append(allocs, alloc) 2534 } 2535 2536 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2537 r := reconciler.Compute() 2538 2539 d := structs.NewDeployment(job) 2540 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2541 DesiredTotal: 10, 2542 } 2543 2544 // Assert the correct results 2545 assertResults(t, r, &resultExpectation{ 2546 createDeployment: d, 2547 deploymentUpdates: nil, 2548 destructive: 4, 2549 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2550 job.TaskGroups[0].Name: { 2551 DestructiveUpdate: 4, 2552 Ignore: 6, 2553 }, 2554 }, 2555 }) 2556 2557 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 2558 } 2559 2560 // Tests the reconciler creates a deployment for inplace updates 2561 func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { 2562 jobOld := mock.Job() 2563 job := jobOld.Copy() 2564 job.Version++ 2565 job.TaskGroups[0].Update = noCanaryUpdate 2566 2567 // Create 10 allocations from the old job 2568 var allocs []*structs.Allocation 2569 for i := 0; i < 10; i++ { 2570 alloc := mock.Alloc() 2571 alloc.Job = jobOld 2572 alloc.JobID = job.ID 2573 alloc.NodeID = uuid.Generate() 2574 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2575 alloc.TaskGroup = job.TaskGroups[0].Name 2576 allocs = append(allocs, alloc) 2577 } 2578 2579 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 2580 r := reconciler.Compute() 2581 2582 d := structs.NewDeployment(job) 2583 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2584 DesiredTotal: 10, 2585 } 2586 2587 // Assert the correct results 2588 assertResults(t, r, &resultExpectation{ 2589 createDeployment: d, 2590 deploymentUpdates: nil, 2591 place: 0, 2592 inplace: 10, 2593 stop: 0, 2594 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2595 job.TaskGroups[0].Name: { 2596 InPlaceUpdate: 10, 2597 }, 2598 }, 2599 }) 2600 } 2601 2602 // Tests the reconciler creates a deployment when the job has a newer create index 2603 func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) { 2604 jobOld := mock.Job() 2605 job := jobOld.Copy() 2606 job.TaskGroups[0].Update = noCanaryUpdate 2607 job.CreateIndex += 100 2608 2609 // Create 5 allocations from the old job 2610 var allocs []*structs.Allocation 2611 for i := 0; i < 5; i++ { 2612 alloc := mock.Alloc() 2613 alloc.Job = jobOld 2614 alloc.JobID = jobOld.ID 2615 alloc.NodeID = uuid.Generate() 2616 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2617 alloc.TaskGroup = job.TaskGroups[0].Name 2618 allocs = append(allocs, alloc) 2619 } 2620 2621 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2622 r := reconciler.Compute() 2623 2624 d := structs.NewDeployment(job) 2625 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2626 DesiredTotal: 5, 2627 } 2628 2629 // Assert the correct results 2630 assertResults(t, r, &resultExpectation{ 2631 createDeployment: d, 2632 deploymentUpdates: nil, 2633 place: 5, 2634 destructive: 0, 2635 inplace: 0, 2636 stop: 0, 2637 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2638 job.TaskGroups[0].Name: { 2639 InPlaceUpdate: 0, 2640 Ignore: 5, 2641 Place: 5, 2642 DestructiveUpdate: 0, 2643 }, 2644 }, 2645 }) 2646 } 2647 2648 // Tests the reconciler doesn't creates a deployment if there are no changes 2649 func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { 2650 job := mock.Job() 2651 job.TaskGroups[0].Update = noCanaryUpdate 2652 2653 // Create 10 allocations from the job 2654 var allocs []*structs.Allocation 2655 for i := 0; i < 10; i++ { 2656 alloc := mock.Alloc() 2657 alloc.Job = job 2658 alloc.JobID = job.ID 2659 alloc.NodeID = uuid.Generate() 2660 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2661 alloc.TaskGroup = job.TaskGroups[0].Name 2662 allocs = append(allocs, alloc) 2663 } 2664 2665 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2666 r := reconciler.Compute() 2667 2668 // Assert the correct results 2669 assertResults(t, r, &resultExpectation{ 2670 createDeployment: nil, 2671 deploymentUpdates: nil, 2672 place: 0, 2673 inplace: 0, 2674 stop: 0, 2675 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2676 job.TaskGroups[0].Name: { 2677 DestructiveUpdate: 0, 2678 Ignore: 10, 2679 }, 2680 }, 2681 }) 2682 } 2683 2684 // Tests the reconciler doesn't place any more canaries when the deployment is 2685 // paused or failed 2686 func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { 2687 job := mock.Job() 2688 job.TaskGroups[0].Update = canaryUpdate 2689 2690 cases := []struct { 2691 name string 2692 deploymentStatus string 2693 stop uint64 2694 }{ 2695 { 2696 name: "paused deployment", 2697 deploymentStatus: structs.DeploymentStatusPaused, 2698 stop: 0, 2699 }, 2700 { 2701 name: "failed deployment", 2702 deploymentStatus: structs.DeploymentStatusFailed, 2703 stop: 1, 2704 }, 2705 } 2706 2707 for _, c := range cases { 2708 t.Run(c.name, func(t *testing.T) { 2709 // Create a deployment that is paused/failed and has placed some canaries 2710 d := structs.NewDeployment(job) 2711 d.Status = c.deploymentStatus 2712 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2713 Promoted: false, 2714 DesiredCanaries: 2, 2715 DesiredTotal: 10, 2716 PlacedAllocs: 1, 2717 } 2718 2719 // Create 10 allocations for the original job 2720 var allocs []*structs.Allocation 2721 for i := 0; i < 10; i++ { 2722 alloc := mock.Alloc() 2723 alloc.Job = job 2724 alloc.JobID = job.ID 2725 alloc.NodeID = uuid.Generate() 2726 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2727 alloc.TaskGroup = job.TaskGroups[0].Name 2728 allocs = append(allocs, alloc) 2729 } 2730 2731 // Create one canary 2732 canary := mock.Alloc() 2733 canary.Job = job 2734 canary.JobID = job.ID 2735 canary.NodeID = uuid.Generate() 2736 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2737 canary.TaskGroup = job.TaskGroups[0].Name 2738 canary.DeploymentID = d.ID 2739 allocs = append(allocs, canary) 2740 d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} 2741 2742 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2743 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2744 r := reconciler.Compute() 2745 2746 // Assert the correct results 2747 assertResults(t, r, &resultExpectation{ 2748 createDeployment: nil, 2749 deploymentUpdates: nil, 2750 place: 0, 2751 inplace: 0, 2752 stop: int(c.stop), 2753 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2754 job.TaskGroups[0].Name: { 2755 Ignore: 11 - c.stop, 2756 Stop: c.stop, 2757 }, 2758 }, 2759 }) 2760 }) 2761 } 2762 } 2763 2764 // Tests the reconciler doesn't place any more allocs when the deployment is 2765 // paused or failed 2766 func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { 2767 job := mock.Job() 2768 job.TaskGroups[0].Update = noCanaryUpdate 2769 job.TaskGroups[0].Count = 15 2770 2771 cases := []struct { 2772 name string 2773 deploymentStatus string 2774 }{ 2775 { 2776 name: "paused deployment", 2777 deploymentStatus: structs.DeploymentStatusPaused, 2778 }, 2779 { 2780 name: "failed deployment", 2781 deploymentStatus: structs.DeploymentStatusFailed, 2782 }, 2783 } 2784 2785 for _, c := range cases { 2786 t.Run(c.name, func(t *testing.T) { 2787 // Create a deployment that is paused and has placed some canaries 2788 d := structs.NewDeployment(job) 2789 d.Status = c.deploymentStatus 2790 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2791 Promoted: false, 2792 DesiredTotal: 15, 2793 PlacedAllocs: 10, 2794 } 2795 2796 // Create 10 allocations for the new job 2797 var allocs []*structs.Allocation 2798 for i := 0; i < 10; i++ { 2799 alloc := mock.Alloc() 2800 alloc.Job = job 2801 alloc.JobID = job.ID 2802 alloc.NodeID = uuid.Generate() 2803 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2804 alloc.TaskGroup = job.TaskGroups[0].Name 2805 allocs = append(allocs, alloc) 2806 } 2807 2808 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 2809 r := reconciler.Compute() 2810 2811 // Assert the correct results 2812 assertResults(t, r, &resultExpectation{ 2813 createDeployment: nil, 2814 deploymentUpdates: nil, 2815 place: 0, 2816 inplace: 0, 2817 stop: 0, 2818 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2819 job.TaskGroups[0].Name: { 2820 Ignore: 10, 2821 }, 2822 }, 2823 }) 2824 }) 2825 } 2826 } 2827 2828 // Tests the reconciler doesn't do any more destructive updates when the 2829 // deployment is paused or failed 2830 func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { 2831 job := mock.Job() 2832 job.TaskGroups[0].Update = noCanaryUpdate 2833 2834 cases := []struct { 2835 name string 2836 deploymentStatus string 2837 }{ 2838 { 2839 name: "paused deployment", 2840 deploymentStatus: structs.DeploymentStatusPaused, 2841 }, 2842 { 2843 name: "failed deployment", 2844 deploymentStatus: structs.DeploymentStatusFailed, 2845 }, 2846 } 2847 2848 for _, c := range cases { 2849 t.Run(c.name, func(t *testing.T) { 2850 // Create a deployment that is paused and has placed some canaries 2851 d := structs.NewDeployment(job) 2852 d.Status = c.deploymentStatus 2853 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2854 Promoted: false, 2855 DesiredTotal: 10, 2856 PlacedAllocs: 1, 2857 } 2858 2859 // Create 9 allocations for the original job 2860 var allocs []*structs.Allocation 2861 for i := 1; i < 10; i++ { 2862 alloc := mock.Alloc() 2863 alloc.Job = job 2864 alloc.JobID = job.ID 2865 alloc.NodeID = uuid.Generate() 2866 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2867 alloc.TaskGroup = job.TaskGroups[0].Name 2868 allocs = append(allocs, alloc) 2869 } 2870 2871 // Create one for the new job 2872 newAlloc := mock.Alloc() 2873 newAlloc.Job = job 2874 newAlloc.JobID = job.ID 2875 newAlloc.NodeID = uuid.Generate() 2876 newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2877 newAlloc.TaskGroup = job.TaskGroups[0].Name 2878 newAlloc.DeploymentID = d.ID 2879 allocs = append(allocs, newAlloc) 2880 2881 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2882 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2883 r := reconciler.Compute() 2884 2885 // Assert the correct results 2886 assertResults(t, r, &resultExpectation{ 2887 createDeployment: nil, 2888 deploymentUpdates: nil, 2889 place: 0, 2890 inplace: 0, 2891 stop: 0, 2892 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2893 job.TaskGroups[0].Name: { 2894 Ignore: 10, 2895 }, 2896 }, 2897 }) 2898 }) 2899 } 2900 } 2901 2902 // Tests the reconciler handles migrating a canary correctly on a draining node 2903 func TestReconciler_DrainNode_Canary(t *testing.T) { 2904 job := mock.Job() 2905 job.TaskGroups[0].Update = canaryUpdate 2906 2907 // Create a deployment that is paused and has placed some canaries 2908 d := structs.NewDeployment(job) 2909 s := &structs.DeploymentState{ 2910 Promoted: false, 2911 DesiredTotal: 10, 2912 DesiredCanaries: 2, 2913 PlacedAllocs: 2, 2914 } 2915 d.TaskGroups[job.TaskGroups[0].Name] = s 2916 2917 // Create 10 allocations from the old job 2918 var allocs []*structs.Allocation 2919 for i := 0; i < 10; i++ { 2920 alloc := mock.Alloc() 2921 alloc.Job = job 2922 alloc.JobID = job.ID 2923 alloc.NodeID = uuid.Generate() 2924 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2925 alloc.TaskGroup = job.TaskGroups[0].Name 2926 allocs = append(allocs, alloc) 2927 } 2928 2929 // Create two canaries for the new job 2930 handled := make(map[string]allocUpdateType) 2931 for i := 0; i < 2; i++ { 2932 // Create one canary 2933 canary := mock.Alloc() 2934 canary.Job = job 2935 canary.JobID = job.ID 2936 canary.NodeID = uuid.Generate() 2937 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2938 canary.TaskGroup = job.TaskGroups[0].Name 2939 canary.DeploymentID = d.ID 2940 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2941 allocs = append(allocs, canary) 2942 handled[canary.ID] = allocUpdateFnIgnore 2943 } 2944 2945 // Build a map of tainted nodes that contains the last canary 2946 tainted := make(map[string]*structs.Node, 1) 2947 n := mock.Node() 2948 n.ID = allocs[11].NodeID 2949 allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true) 2950 n.Drain = true 2951 tainted[n.ID] = n 2952 2953 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2954 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 2955 r := reconciler.Compute() 2956 2957 // Assert the correct results 2958 assertResults(t, r, &resultExpectation{ 2959 createDeployment: nil, 2960 deploymentUpdates: nil, 2961 place: 1, 2962 inplace: 0, 2963 stop: 1, 2964 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2965 job.TaskGroups[0].Name: { 2966 Canary: 1, 2967 Ignore: 11, 2968 }, 2969 }, 2970 }) 2971 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2972 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2973 } 2974 2975 // Tests the reconciler handles migrating a canary correctly on a lost node 2976 func TestReconciler_LostNode_Canary(t *testing.T) { 2977 job := mock.Job() 2978 job.TaskGroups[0].Update = canaryUpdate 2979 2980 // Create a deployment that is paused and has placed some canaries 2981 d := structs.NewDeployment(job) 2982 s := &structs.DeploymentState{ 2983 Promoted: false, 2984 DesiredTotal: 10, 2985 DesiredCanaries: 2, 2986 PlacedAllocs: 2, 2987 } 2988 d.TaskGroups[job.TaskGroups[0].Name] = s 2989 2990 // Create 10 allocations from the old job 2991 var allocs []*structs.Allocation 2992 for i := 0; i < 10; i++ { 2993 alloc := mock.Alloc() 2994 alloc.Job = job 2995 alloc.JobID = job.ID 2996 alloc.NodeID = uuid.Generate() 2997 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2998 alloc.TaskGroup = job.TaskGroups[0].Name 2999 allocs = append(allocs, alloc) 3000 } 3001 3002 // Create two canaries for the new job 3003 handled := make(map[string]allocUpdateType) 3004 for i := 0; i < 2; i++ { 3005 // Create one canary 3006 canary := mock.Alloc() 3007 canary.Job = job 3008 canary.JobID = job.ID 3009 canary.NodeID = uuid.Generate() 3010 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3011 canary.TaskGroup = job.TaskGroups[0].Name 3012 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3013 canary.DeploymentID = d.ID 3014 allocs = append(allocs, canary) 3015 handled[canary.ID] = allocUpdateFnIgnore 3016 } 3017 3018 // Build a map of tainted nodes that contains the last canary 3019 tainted := make(map[string]*structs.Node, 1) 3020 n := mock.Node() 3021 n.ID = allocs[11].NodeID 3022 n.Status = structs.NodeStatusDown 3023 tainted[n.ID] = n 3024 3025 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3026 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3027 r := reconciler.Compute() 3028 3029 // Assert the correct results 3030 assertResults(t, r, &resultExpectation{ 3031 createDeployment: nil, 3032 deploymentUpdates: nil, 3033 place: 1, 3034 inplace: 0, 3035 stop: 1, 3036 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3037 job.TaskGroups[0].Name: { 3038 Canary: 1, 3039 Ignore: 11, 3040 }, 3041 }, 3042 }) 3043 3044 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 3045 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 3046 } 3047 3048 // Tests the reconciler handles stopping canaries from older deployments 3049 func TestReconciler_StopOldCanaries(t *testing.T) { 3050 job := mock.Job() 3051 job.TaskGroups[0].Update = canaryUpdate 3052 3053 // Create an old deployment that has placed some canaries 3054 d := structs.NewDeployment(job) 3055 s := &structs.DeploymentState{ 3056 Promoted: false, 3057 DesiredTotal: 10, 3058 DesiredCanaries: 2, 3059 PlacedAllocs: 2, 3060 } 3061 d.TaskGroups[job.TaskGroups[0].Name] = s 3062 3063 // Update the job 3064 job.Version += 10 3065 3066 // Create 10 allocations from the old job 3067 var allocs []*structs.Allocation 3068 for i := 0; i < 10; i++ { 3069 alloc := mock.Alloc() 3070 alloc.Job = job 3071 alloc.JobID = job.ID 3072 alloc.NodeID = uuid.Generate() 3073 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3074 alloc.TaskGroup = job.TaskGroups[0].Name 3075 allocs = append(allocs, alloc) 3076 } 3077 3078 // Create canaries 3079 for i := 0; i < 2; i++ { 3080 // Create one canary 3081 canary := mock.Alloc() 3082 canary.Job = job 3083 canary.JobID = job.ID 3084 canary.NodeID = uuid.Generate() 3085 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3086 canary.TaskGroup = job.TaskGroups[0].Name 3087 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3088 canary.DeploymentID = d.ID 3089 allocs = append(allocs, canary) 3090 } 3091 3092 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3093 r := reconciler.Compute() 3094 3095 newD := structs.NewDeployment(job) 3096 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3097 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3098 DesiredCanaries: 2, 3099 DesiredTotal: 10, 3100 } 3101 3102 // Assert the correct results 3103 assertResults(t, r, &resultExpectation{ 3104 createDeployment: newD, 3105 deploymentUpdates: []*structs.DeploymentStatusUpdate{ 3106 { 3107 DeploymentID: d.ID, 3108 Status: structs.DeploymentStatusCancelled, 3109 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 3110 }, 3111 }, 3112 place: 2, 3113 inplace: 0, 3114 stop: 2, 3115 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3116 job.TaskGroups[0].Name: { 3117 Canary: 2, 3118 Stop: 2, 3119 Ignore: 10, 3120 }, 3121 }, 3122 }) 3123 3124 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3125 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3126 } 3127 3128 // Tests the reconciler creates new canaries when the job changes 3129 func TestReconciler_NewCanaries(t *testing.T) { 3130 job := mock.Job() 3131 job.TaskGroups[0].Update = canaryUpdate 3132 3133 // Create 10 allocations from the old job 3134 var allocs []*structs.Allocation 3135 for i := 0; i < 10; i++ { 3136 alloc := mock.Alloc() 3137 alloc.Job = job 3138 alloc.JobID = job.ID 3139 alloc.NodeID = uuid.Generate() 3140 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3141 alloc.TaskGroup = job.TaskGroups[0].Name 3142 allocs = append(allocs, alloc) 3143 } 3144 3145 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3146 r := reconciler.Compute() 3147 3148 newD := structs.NewDeployment(job) 3149 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3150 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3151 DesiredCanaries: 2, 3152 DesiredTotal: 10, 3153 } 3154 3155 // Assert the correct results 3156 assertResults(t, r, &resultExpectation{ 3157 createDeployment: newD, 3158 deploymentUpdates: nil, 3159 place: 2, 3160 inplace: 0, 3161 stop: 0, 3162 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3163 job.TaskGroups[0].Name: { 3164 Canary: 2, 3165 Ignore: 10, 3166 }, 3167 }, 3168 }) 3169 3170 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3171 } 3172 3173 // Tests the reconciler creates new canaries when the job changes and the 3174 // canary count is greater than the task group count 3175 func TestReconciler_NewCanaries_CountGreater(t *testing.T) { 3176 job := mock.Job() 3177 job.TaskGroups[0].Count = 3 3178 job.TaskGroups[0].Update = canaryUpdate.Copy() 3179 job.TaskGroups[0].Update.Canary = 7 3180 3181 // Create 3 allocations from the old job 3182 var allocs []*structs.Allocation 3183 for i := 0; i < 3; i++ { 3184 alloc := mock.Alloc() 3185 alloc.Job = job 3186 alloc.JobID = job.ID 3187 alloc.NodeID = uuid.Generate() 3188 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3189 alloc.TaskGroup = job.TaskGroups[0].Name 3190 allocs = append(allocs, alloc) 3191 } 3192 3193 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3194 r := reconciler.Compute() 3195 3196 newD := structs.NewDeployment(job) 3197 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3198 state := &structs.DeploymentState{ 3199 DesiredCanaries: 7, 3200 DesiredTotal: 3, 3201 } 3202 newD.TaskGroups[job.TaskGroups[0].Name] = state 3203 3204 // Assert the correct results 3205 assertResults(t, r, &resultExpectation{ 3206 createDeployment: newD, 3207 deploymentUpdates: nil, 3208 place: 7, 3209 inplace: 0, 3210 stop: 0, 3211 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3212 job.TaskGroups[0].Name: { 3213 Canary: 7, 3214 Ignore: 3, 3215 }, 3216 }, 3217 }) 3218 3219 assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place)) 3220 } 3221 3222 // Tests the reconciler creates new canaries when the job changes for multiple 3223 // task groups 3224 func TestReconciler_NewCanaries_MultiTG(t *testing.T) { 3225 job := mock.Job() 3226 job.TaskGroups[0].Update = canaryUpdate 3227 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3228 job.TaskGroups[0].Name = "tg2" 3229 3230 // Create 10 allocations from the old job for each tg 3231 var allocs []*structs.Allocation 3232 for j := 0; j < 2; j++ { 3233 for i := 0; i < 10; i++ { 3234 alloc := mock.Alloc() 3235 alloc.Job = job 3236 alloc.JobID = job.ID 3237 alloc.NodeID = uuid.Generate() 3238 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 3239 alloc.TaskGroup = job.TaskGroups[j].Name 3240 allocs = append(allocs, alloc) 3241 } 3242 } 3243 3244 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3245 r := reconciler.Compute() 3246 3247 newD := structs.NewDeployment(job) 3248 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3249 state := &structs.DeploymentState{ 3250 DesiredCanaries: 2, 3251 DesiredTotal: 10, 3252 } 3253 newD.TaskGroups[job.TaskGroups[0].Name] = state 3254 newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy() 3255 3256 // Assert the correct results 3257 assertResults(t, r, &resultExpectation{ 3258 createDeployment: newD, 3259 deploymentUpdates: nil, 3260 place: 4, 3261 inplace: 0, 3262 stop: 0, 3263 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3264 job.TaskGroups[0].Name: { 3265 Canary: 2, 3266 Ignore: 10, 3267 }, 3268 job.TaskGroups[1].Name: { 3269 Canary: 2, 3270 Ignore: 10, 3271 }, 3272 }, 3273 }) 3274 3275 assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) 3276 } 3277 3278 // Tests the reconciler creates new canaries when the job changes and scales up 3279 func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { 3280 // Scale the job up to 15 3281 job := mock.Job() 3282 job.TaskGroups[0].Update = canaryUpdate 3283 job.TaskGroups[0].Count = 15 3284 3285 // Create 10 allocations from the old job 3286 var allocs []*structs.Allocation 3287 for i := 0; i < 10; i++ { 3288 alloc := mock.Alloc() 3289 alloc.Job = job 3290 alloc.JobID = job.ID 3291 alloc.NodeID = uuid.Generate() 3292 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3293 alloc.TaskGroup = job.TaskGroups[0].Name 3294 allocs = append(allocs, alloc) 3295 } 3296 3297 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3298 r := reconciler.Compute() 3299 3300 newD := structs.NewDeployment(job) 3301 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3302 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3303 DesiredCanaries: 2, 3304 DesiredTotal: 15, 3305 } 3306 3307 // Assert the correct results 3308 assertResults(t, r, &resultExpectation{ 3309 createDeployment: newD, 3310 deploymentUpdates: nil, 3311 place: 2, 3312 inplace: 0, 3313 stop: 0, 3314 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3315 job.TaskGroups[0].Name: { 3316 Canary: 2, 3317 Ignore: 10, 3318 }, 3319 }, 3320 }) 3321 3322 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3323 } 3324 3325 // Tests the reconciler creates new canaries when the job changes and scales 3326 // down 3327 func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { 3328 // Scale the job down to 5 3329 job := mock.Job() 3330 job.TaskGroups[0].Update = canaryUpdate 3331 job.TaskGroups[0].Count = 5 3332 3333 // Create 10 allocations from the old job 3334 var allocs []*structs.Allocation 3335 for i := 0; i < 10; i++ { 3336 alloc := mock.Alloc() 3337 alloc.Job = job 3338 alloc.JobID = job.ID 3339 alloc.NodeID = uuid.Generate() 3340 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3341 alloc.TaskGroup = job.TaskGroups[0].Name 3342 allocs = append(allocs, alloc) 3343 } 3344 3345 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3346 r := reconciler.Compute() 3347 3348 newD := structs.NewDeployment(job) 3349 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3350 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3351 DesiredCanaries: 2, 3352 DesiredTotal: 5, 3353 } 3354 3355 // Assert the correct results 3356 assertResults(t, r, &resultExpectation{ 3357 createDeployment: newD, 3358 deploymentUpdates: nil, 3359 place: 2, 3360 inplace: 0, 3361 stop: 5, 3362 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3363 job.TaskGroups[0].Name: { 3364 Canary: 2, 3365 Stop: 5, 3366 Ignore: 5, 3367 }, 3368 }, 3369 }) 3370 3371 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3372 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 3373 } 3374 3375 // Tests the reconciler handles filling the names of partially placed canaries 3376 func TestReconciler_NewCanaries_FillNames(t *testing.T) { 3377 job := mock.Job() 3378 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 3379 Canary: 4, 3380 MaxParallel: 2, 3381 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 3382 MinHealthyTime: 10 * time.Second, 3383 HealthyDeadline: 10 * time.Minute, 3384 } 3385 3386 // Create an existing deployment that has placed some canaries 3387 d := structs.NewDeployment(job) 3388 s := &structs.DeploymentState{ 3389 Promoted: false, 3390 DesiredTotal: 10, 3391 DesiredCanaries: 4, 3392 PlacedAllocs: 2, 3393 } 3394 d.TaskGroups[job.TaskGroups[0].Name] = s 3395 3396 // Create 10 allocations from the old job 3397 var allocs []*structs.Allocation 3398 for i := 0; i < 10; i++ { 3399 alloc := mock.Alloc() 3400 alloc.Job = job 3401 alloc.JobID = job.ID 3402 alloc.NodeID = uuid.Generate() 3403 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3404 alloc.TaskGroup = job.TaskGroups[0].Name 3405 allocs = append(allocs, alloc) 3406 } 3407 3408 // Create canaries but pick names at the ends 3409 for i := 0; i < 4; i += 3 { 3410 // Create one canary 3411 canary := mock.Alloc() 3412 canary.Job = job 3413 canary.JobID = job.ID 3414 canary.NodeID = uuid.Generate() 3415 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3416 canary.TaskGroup = job.TaskGroups[0].Name 3417 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3418 canary.DeploymentID = d.ID 3419 allocs = append(allocs, canary) 3420 } 3421 3422 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3423 r := reconciler.Compute() 3424 3425 // Assert the correct results 3426 assertResults(t, r, &resultExpectation{ 3427 createDeployment: nil, 3428 deploymentUpdates: nil, 3429 place: 2, 3430 inplace: 0, 3431 stop: 0, 3432 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3433 job.TaskGroups[0].Name: { 3434 Canary: 2, 3435 Ignore: 12, 3436 }, 3437 }, 3438 }) 3439 3440 assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) 3441 } 3442 3443 // Tests the reconciler handles canary promotion by unblocking max_parallel 3444 func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { 3445 job := mock.Job() 3446 job.TaskGroups[0].Update = canaryUpdate 3447 3448 // Create an existing deployment that has placed some canaries and mark them 3449 // promoted 3450 d := structs.NewDeployment(job) 3451 s := &structs.DeploymentState{ 3452 Promoted: true, 3453 DesiredTotal: 10, 3454 DesiredCanaries: 2, 3455 PlacedAllocs: 2, 3456 } 3457 d.TaskGroups[job.TaskGroups[0].Name] = s 3458 3459 // Create 10 allocations from the old job 3460 var allocs []*structs.Allocation 3461 for i := 0; i < 10; i++ { 3462 alloc := mock.Alloc() 3463 alloc.Job = job 3464 alloc.JobID = job.ID 3465 alloc.NodeID = uuid.Generate() 3466 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3467 alloc.TaskGroup = job.TaskGroups[0].Name 3468 allocs = append(allocs, alloc) 3469 } 3470 3471 // Create the canaries 3472 handled := make(map[string]allocUpdateType) 3473 for i := 0; i < 2; i++ { 3474 // Create one canary 3475 canary := mock.Alloc() 3476 canary.Job = job 3477 canary.JobID = job.ID 3478 canary.NodeID = uuid.Generate() 3479 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3480 canary.TaskGroup = job.TaskGroups[0].Name 3481 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3482 canary.DeploymentID = d.ID 3483 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3484 Healthy: helper.BoolToPtr(true), 3485 } 3486 allocs = append(allocs, canary) 3487 handled[canary.ID] = allocUpdateFnIgnore 3488 } 3489 3490 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3491 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3492 r := reconciler.Compute() 3493 3494 // Assert the correct results 3495 assertResults(t, r, &resultExpectation{ 3496 createDeployment: nil, 3497 deploymentUpdates: nil, 3498 destructive: 2, 3499 stop: 2, 3500 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3501 job.TaskGroups[0].Name: { 3502 Stop: 2, 3503 DestructiveUpdate: 2, 3504 Ignore: 8, 3505 }, 3506 }, 3507 }) 3508 3509 assertNoCanariesStopped(t, d, r.stop) 3510 assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) 3511 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3512 } 3513 3514 // Tests the reconciler handles canary promotion when the canary count equals 3515 // the total correctly 3516 func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { 3517 job := mock.Job() 3518 job.TaskGroups[0].Update = canaryUpdate 3519 job.TaskGroups[0].Count = 2 3520 3521 // Create an existing deployment that has placed some canaries and mark them 3522 // promoted 3523 d := structs.NewDeployment(job) 3524 s := &structs.DeploymentState{ 3525 Promoted: true, 3526 DesiredTotal: 2, 3527 DesiredCanaries: 2, 3528 PlacedAllocs: 2, 3529 HealthyAllocs: 2, 3530 } 3531 d.TaskGroups[job.TaskGroups[0].Name] = s 3532 3533 // Create 2 allocations from the old job 3534 var allocs []*structs.Allocation 3535 for i := 0; i < 2; i++ { 3536 alloc := mock.Alloc() 3537 alloc.Job = job 3538 alloc.JobID = job.ID 3539 alloc.NodeID = uuid.Generate() 3540 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3541 alloc.TaskGroup = job.TaskGroups[0].Name 3542 allocs = append(allocs, alloc) 3543 } 3544 3545 // Create the canaries 3546 handled := make(map[string]allocUpdateType) 3547 for i := 0; i < 2; i++ { 3548 // Create one canary 3549 canary := mock.Alloc() 3550 canary.Job = job 3551 canary.JobID = job.ID 3552 canary.NodeID = uuid.Generate() 3553 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3554 canary.TaskGroup = job.TaskGroups[0].Name 3555 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3556 canary.DeploymentID = d.ID 3557 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3558 Healthy: helper.BoolToPtr(true), 3559 } 3560 allocs = append(allocs, canary) 3561 handled[canary.ID] = allocUpdateFnIgnore 3562 } 3563 3564 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3565 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3566 r := reconciler.Compute() 3567 3568 updates := []*structs.DeploymentStatusUpdate{ 3569 { 3570 DeploymentID: d.ID, 3571 Status: structs.DeploymentStatusSuccessful, 3572 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3573 }, 3574 } 3575 3576 // Assert the correct results 3577 assertResults(t, r, &resultExpectation{ 3578 createDeployment: nil, 3579 deploymentUpdates: updates, 3580 place: 0, 3581 inplace: 0, 3582 stop: 2, 3583 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3584 job.TaskGroups[0].Name: { 3585 Stop: 2, 3586 Ignore: 2, 3587 }, 3588 }, 3589 }) 3590 3591 assertNoCanariesStopped(t, d, r.stop) 3592 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3593 } 3594 3595 // Tests the reconciler checks the health of placed allocs to determine the 3596 // limit 3597 func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { 3598 job := mock.Job() 3599 job.TaskGroups[0].Update = noCanaryUpdate 3600 3601 cases := []struct { 3602 healthy int 3603 }{ 3604 { 3605 healthy: 0, 3606 }, 3607 { 3608 healthy: 1, 3609 }, 3610 { 3611 healthy: 2, 3612 }, 3613 { 3614 healthy: 3, 3615 }, 3616 { 3617 healthy: 4, 3618 }, 3619 } 3620 3621 for _, c := range cases { 3622 t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { 3623 // Create an existing deployment that has placed some canaries and mark them 3624 // promoted 3625 d := structs.NewDeployment(job) 3626 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3627 Promoted: true, 3628 DesiredTotal: 10, 3629 PlacedAllocs: 4, 3630 } 3631 3632 // Create 6 allocations from the old job 3633 var allocs []*structs.Allocation 3634 for i := 4; i < 10; i++ { 3635 alloc := mock.Alloc() 3636 alloc.Job = job 3637 alloc.JobID = job.ID 3638 alloc.NodeID = uuid.Generate() 3639 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3640 alloc.TaskGroup = job.TaskGroups[0].Name 3641 allocs = append(allocs, alloc) 3642 } 3643 3644 // Create the new allocs 3645 handled := make(map[string]allocUpdateType) 3646 for i := 0; i < 4; i++ { 3647 new := mock.Alloc() 3648 new.Job = job 3649 new.JobID = job.ID 3650 new.NodeID = uuid.Generate() 3651 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3652 new.TaskGroup = job.TaskGroups[0].Name 3653 new.DeploymentID = d.ID 3654 if i < c.healthy { 3655 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3656 Healthy: helper.BoolToPtr(true), 3657 } 3658 } 3659 allocs = append(allocs, new) 3660 handled[new.ID] = allocUpdateFnIgnore 3661 } 3662 3663 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3664 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3665 r := reconciler.Compute() 3666 3667 // Assert the correct results 3668 assertResults(t, r, &resultExpectation{ 3669 createDeployment: nil, 3670 deploymentUpdates: nil, 3671 destructive: c.healthy, 3672 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3673 job.TaskGroups[0].Name: { 3674 DestructiveUpdate: uint64(c.healthy), 3675 Ignore: uint64(10 - c.healthy), 3676 }, 3677 }, 3678 }) 3679 3680 if c.healthy != 0 { 3681 assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) 3682 } 3683 }) 3684 } 3685 } 3686 3687 // Tests the reconciler handles an alloc on a tainted node during a rolling 3688 // update 3689 func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { 3690 job := mock.Job() 3691 job.TaskGroups[0].Update = noCanaryUpdate 3692 3693 // Create an existing deployment that has some placed allocs 3694 d := structs.NewDeployment(job) 3695 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3696 Promoted: true, 3697 DesiredTotal: 10, 3698 PlacedAllocs: 7, 3699 } 3700 3701 // Create 2 allocations from the old job 3702 var allocs []*structs.Allocation 3703 for i := 8; i < 10; i++ { 3704 alloc := mock.Alloc() 3705 alloc.Job = job 3706 alloc.JobID = job.ID 3707 alloc.NodeID = uuid.Generate() 3708 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3709 alloc.TaskGroup = job.TaskGroups[0].Name 3710 allocs = append(allocs, alloc) 3711 } 3712 3713 // Create the healthy replacements 3714 handled := make(map[string]allocUpdateType) 3715 for i := 0; i < 8; i++ { 3716 new := mock.Alloc() 3717 new.Job = job 3718 new.JobID = job.ID 3719 new.NodeID = uuid.Generate() 3720 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3721 new.TaskGroup = job.TaskGroups[0].Name 3722 new.DeploymentID = d.ID 3723 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3724 Healthy: helper.BoolToPtr(true), 3725 } 3726 allocs = append(allocs, new) 3727 handled[new.ID] = allocUpdateFnIgnore 3728 } 3729 3730 // Build a map of tainted nodes 3731 tainted := make(map[string]*structs.Node, 3) 3732 for i := 0; i < 3; i++ { 3733 n := mock.Node() 3734 n.ID = allocs[2+i].NodeID 3735 if i == 0 { 3736 n.Status = structs.NodeStatusDown 3737 } else { 3738 n.Drain = true 3739 allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3740 } 3741 tainted[n.ID] = n 3742 } 3743 3744 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3745 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3746 r := reconciler.Compute() 3747 3748 // Assert the correct results 3749 assertResults(t, r, &resultExpectation{ 3750 createDeployment: nil, 3751 deploymentUpdates: nil, 3752 place: 3, 3753 destructive: 2, 3754 stop: 3, 3755 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3756 job.TaskGroups[0].Name: { 3757 Place: 1, // Place the lost 3758 Stop: 1, // Stop the lost 3759 Migrate: 2, // Migrate the tainted 3760 DestructiveUpdate: 2, 3761 Ignore: 5, 3762 }, 3763 }, 3764 }) 3765 3766 assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) 3767 assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) 3768 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 3769 } 3770 3771 // Tests the reconciler handles a failed deployment with allocs on tainted 3772 // nodes 3773 func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) { 3774 job := mock.Job() 3775 job.TaskGroups[0].Update = noCanaryUpdate 3776 3777 // Create an existing failed deployment that has some placed allocs 3778 d := structs.NewDeployment(job) 3779 d.Status = structs.DeploymentStatusFailed 3780 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3781 Promoted: true, 3782 DesiredTotal: 10, 3783 PlacedAllocs: 4, 3784 } 3785 3786 // Create 6 allocations from the old job 3787 var allocs []*structs.Allocation 3788 for i := 4; i < 10; i++ { 3789 alloc := mock.Alloc() 3790 alloc.Job = job 3791 alloc.JobID = job.ID 3792 alloc.NodeID = uuid.Generate() 3793 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3794 alloc.TaskGroup = job.TaskGroups[0].Name 3795 allocs = append(allocs, alloc) 3796 } 3797 3798 // Create the healthy replacements 3799 handled := make(map[string]allocUpdateType) 3800 for i := 0; i < 4; i++ { 3801 new := mock.Alloc() 3802 new.Job = job 3803 new.JobID = job.ID 3804 new.NodeID = uuid.Generate() 3805 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3806 new.TaskGroup = job.TaskGroups[0].Name 3807 new.DeploymentID = d.ID 3808 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3809 Healthy: helper.BoolToPtr(true), 3810 } 3811 allocs = append(allocs, new) 3812 handled[new.ID] = allocUpdateFnIgnore 3813 } 3814 3815 // Build a map of tainted nodes 3816 tainted := make(map[string]*structs.Node, 2) 3817 for i := 0; i < 2; i++ { 3818 n := mock.Node() 3819 n.ID = allocs[6+i].NodeID 3820 if i == 0 { 3821 n.Status = structs.NodeStatusDown 3822 } else { 3823 n.Drain = true 3824 allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3825 } 3826 tainted[n.ID] = n 3827 } 3828 3829 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3830 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3831 r := reconciler.Compute() 3832 3833 // Assert the correct results 3834 assertResults(t, r, &resultExpectation{ 3835 createDeployment: nil, 3836 deploymentUpdates: nil, 3837 place: 2, 3838 inplace: 0, 3839 stop: 2, 3840 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3841 job.TaskGroups[0].Name: { 3842 Place: 1, 3843 Migrate: 1, 3844 Stop: 1, 3845 Ignore: 8, 3846 }, 3847 }, 3848 }) 3849 3850 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3851 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3852 } 3853 3854 // Tests the reconciler handles a run after a deployment is complete 3855 // successfully. 3856 func TestReconciler_CompleteDeployment(t *testing.T) { 3857 job := mock.Job() 3858 job.TaskGroups[0].Update = canaryUpdate 3859 3860 d := structs.NewDeployment(job) 3861 d.Status = structs.DeploymentStatusSuccessful 3862 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3863 Promoted: true, 3864 DesiredTotal: 10, 3865 DesiredCanaries: 2, 3866 PlacedAllocs: 10, 3867 HealthyAllocs: 10, 3868 } 3869 3870 // Create allocations from the old job 3871 var allocs []*structs.Allocation 3872 for i := 0; i < 10; i++ { 3873 alloc := mock.Alloc() 3874 alloc.Job = job 3875 alloc.JobID = job.ID 3876 alloc.NodeID = uuid.Generate() 3877 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3878 alloc.TaskGroup = job.TaskGroups[0].Name 3879 alloc.DeploymentID = d.ID 3880 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3881 Healthy: helper.BoolToPtr(true), 3882 } 3883 allocs = append(allocs, alloc) 3884 } 3885 3886 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3887 r := reconciler.Compute() 3888 3889 // Assert the correct results 3890 assertResults(t, r, &resultExpectation{ 3891 createDeployment: nil, 3892 deploymentUpdates: nil, 3893 place: 0, 3894 inplace: 0, 3895 stop: 0, 3896 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3897 job.TaskGroups[0].Name: { 3898 Ignore: 10, 3899 }, 3900 }, 3901 }) 3902 } 3903 3904 // Tests that the reconciler marks a deployment as complete once there is 3905 // nothing left to place even if there are failed allocations that are part of 3906 // the deployment. 3907 func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) { 3908 job := mock.Job() 3909 job.TaskGroups[0].Update = noCanaryUpdate 3910 3911 d := structs.NewDeployment(job) 3912 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3913 DesiredTotal: 10, 3914 PlacedAllocs: 20, 3915 HealthyAllocs: 10, 3916 } 3917 3918 // Create 10 healthy allocs and 10 allocs that are failed 3919 var allocs []*structs.Allocation 3920 for i := 0; i < 20; i++ { 3921 alloc := mock.Alloc() 3922 alloc.Job = job 3923 alloc.JobID = job.ID 3924 alloc.NodeID = uuid.Generate() 3925 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10)) 3926 alloc.TaskGroup = job.TaskGroups[0].Name 3927 alloc.DeploymentID = d.ID 3928 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 3929 if i < 10 { 3930 alloc.ClientStatus = structs.AllocClientStatusRunning 3931 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true) 3932 } else { 3933 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3934 alloc.ClientStatus = structs.AllocClientStatusFailed 3935 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false) 3936 } 3937 3938 allocs = append(allocs, alloc) 3939 } 3940 3941 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3942 r := reconciler.Compute() 3943 3944 updates := []*structs.DeploymentStatusUpdate{ 3945 { 3946 DeploymentID: d.ID, 3947 Status: structs.DeploymentStatusSuccessful, 3948 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3949 }, 3950 } 3951 3952 // Assert the correct results 3953 assertResults(t, r, &resultExpectation{ 3954 createDeployment: nil, 3955 deploymentUpdates: updates, 3956 place: 0, 3957 inplace: 0, 3958 stop: 0, 3959 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3960 job.TaskGroups[0].Name: { 3961 Ignore: 10, 3962 }, 3963 }, 3964 }) 3965 } 3966 3967 // Test that a failed deployment cancels non-promoted canaries 3968 func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { 3969 // Create a job with two task groups 3970 job := mock.Job() 3971 job.TaskGroups[0].Update = canaryUpdate 3972 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3973 job.TaskGroups[1].Name = "two" 3974 3975 // Create an existing failed deployment that has promoted one task group 3976 d := structs.NewDeployment(job) 3977 d.Status = structs.DeploymentStatusFailed 3978 s0 := &structs.DeploymentState{ 3979 Promoted: true, 3980 DesiredTotal: 10, 3981 DesiredCanaries: 2, 3982 PlacedAllocs: 4, 3983 } 3984 s1 := &structs.DeploymentState{ 3985 Promoted: false, 3986 DesiredTotal: 10, 3987 DesiredCanaries: 2, 3988 PlacedAllocs: 2, 3989 } 3990 d.TaskGroups[job.TaskGroups[0].Name] = s0 3991 d.TaskGroups[job.TaskGroups[1].Name] = s1 3992 3993 // Create 6 allocations from the old job 3994 var allocs []*structs.Allocation 3995 handled := make(map[string]allocUpdateType) 3996 for _, group := range []int{0, 1} { 3997 replacements := 4 3998 state := s0 3999 if group == 1 { 4000 replacements = 2 4001 state = s1 4002 } 4003 4004 // Create the healthy replacements 4005 for i := 0; i < replacements; i++ { 4006 new := mock.Alloc() 4007 new.Job = job 4008 new.JobID = job.ID 4009 new.NodeID = uuid.Generate() 4010 new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4011 new.TaskGroup = job.TaskGroups[group].Name 4012 new.DeploymentID = d.ID 4013 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4014 Healthy: helper.BoolToPtr(true), 4015 } 4016 allocs = append(allocs, new) 4017 handled[new.ID] = allocUpdateFnIgnore 4018 4019 // Add the alloc to the canary list 4020 if i < 2 { 4021 state.PlacedCanaries = append(state.PlacedCanaries, new.ID) 4022 } 4023 } 4024 for i := replacements; i < 10; i++ { 4025 alloc := mock.Alloc() 4026 alloc.Job = job 4027 alloc.JobID = job.ID 4028 alloc.NodeID = uuid.Generate() 4029 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4030 alloc.TaskGroup = job.TaskGroups[group].Name 4031 allocs = append(allocs, alloc) 4032 } 4033 } 4034 4035 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4036 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4037 r := reconciler.Compute() 4038 4039 // Assert the correct results 4040 assertResults(t, r, &resultExpectation{ 4041 createDeployment: nil, 4042 deploymentUpdates: nil, 4043 place: 0, 4044 inplace: 0, 4045 stop: 2, 4046 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4047 job.TaskGroups[0].Name: { 4048 Ignore: 10, 4049 }, 4050 job.TaskGroups[1].Name: { 4051 Stop: 2, 4052 Ignore: 8, 4053 }, 4054 }, 4055 }) 4056 4057 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 4058 } 4059 4060 // Test that a failed deployment and updated job works 4061 func TestReconciler_FailedDeployment_NewJob(t *testing.T) { 4062 job := mock.Job() 4063 job.TaskGroups[0].Update = noCanaryUpdate 4064 4065 // Create an existing failed deployment that has some placed allocs 4066 d := structs.NewDeployment(job) 4067 d.Status = structs.DeploymentStatusFailed 4068 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4069 Promoted: true, 4070 DesiredTotal: 10, 4071 PlacedAllocs: 4, 4072 } 4073 4074 // Create 6 allocations from the old job 4075 var allocs []*structs.Allocation 4076 for i := 4; i < 10; i++ { 4077 alloc := mock.Alloc() 4078 alloc.Job = job 4079 alloc.JobID = job.ID 4080 alloc.NodeID = uuid.Generate() 4081 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4082 alloc.TaskGroup = job.TaskGroups[0].Name 4083 allocs = append(allocs, alloc) 4084 } 4085 4086 // Create the healthy replacements 4087 for i := 0; i < 4; i++ { 4088 new := mock.Alloc() 4089 new.Job = job 4090 new.JobID = job.ID 4091 new.NodeID = uuid.Generate() 4092 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4093 new.TaskGroup = job.TaskGroups[0].Name 4094 new.DeploymentID = d.ID 4095 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4096 Healthy: helper.BoolToPtr(true), 4097 } 4098 allocs = append(allocs, new) 4099 } 4100 4101 // Up the job version 4102 jobNew := job.Copy() 4103 jobNew.Version += 100 4104 4105 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "") 4106 r := reconciler.Compute() 4107 4108 dnew := structs.NewDeployment(jobNew) 4109 dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4110 DesiredTotal: 10, 4111 } 4112 4113 // Assert the correct results 4114 assertResults(t, r, &resultExpectation{ 4115 createDeployment: dnew, 4116 deploymentUpdates: nil, 4117 destructive: 4, 4118 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4119 job.TaskGroups[0].Name: { 4120 DestructiveUpdate: 4, 4121 Ignore: 6, 4122 }, 4123 }, 4124 }) 4125 4126 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 4127 } 4128 4129 // Tests the reconciler marks a deployment as complete 4130 func TestReconciler_MarkDeploymentComplete(t *testing.T) { 4131 job := mock.Job() 4132 job.TaskGroups[0].Update = noCanaryUpdate 4133 4134 d := structs.NewDeployment(job) 4135 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4136 Promoted: true, 4137 DesiredTotal: 10, 4138 PlacedAllocs: 10, 4139 HealthyAllocs: 10, 4140 } 4141 4142 // Create allocations from the old job 4143 var allocs []*structs.Allocation 4144 for i := 0; i < 10; i++ { 4145 alloc := mock.Alloc() 4146 alloc.Job = job 4147 alloc.JobID = job.ID 4148 alloc.NodeID = uuid.Generate() 4149 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4150 alloc.TaskGroup = job.TaskGroups[0].Name 4151 alloc.DeploymentID = d.ID 4152 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 4153 Healthy: helper.BoolToPtr(true), 4154 } 4155 allocs = append(allocs, alloc) 4156 } 4157 4158 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 4159 r := reconciler.Compute() 4160 4161 updates := []*structs.DeploymentStatusUpdate{ 4162 { 4163 DeploymentID: d.ID, 4164 Status: structs.DeploymentStatusSuccessful, 4165 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4166 }, 4167 } 4168 4169 // Assert the correct results 4170 assertResults(t, r, &resultExpectation{ 4171 createDeployment: nil, 4172 deploymentUpdates: updates, 4173 place: 0, 4174 inplace: 0, 4175 stop: 0, 4176 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4177 job.TaskGroups[0].Name: { 4178 Ignore: 10, 4179 }, 4180 }, 4181 }) 4182 } 4183 4184 // Tests the reconciler handles changing a job such that a deployment is created 4185 // while doing a scale up but as the second eval. 4186 func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { 4187 // Scale the job up to 15 4188 job := mock.Job() 4189 job.TaskGroups[0].Update = noCanaryUpdate 4190 job.TaskGroups[0].Count = 30 4191 4192 // Create a deployment that is paused and has placed some canaries 4193 d := structs.NewDeployment(job) 4194 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4195 Promoted: false, 4196 DesiredTotal: 30, 4197 PlacedAllocs: 20, 4198 } 4199 4200 // Create 10 allocations from the old job 4201 var allocs []*structs.Allocation 4202 for i := 0; i < 10; i++ { 4203 alloc := mock.Alloc() 4204 alloc.Job = job 4205 alloc.JobID = job.ID 4206 alloc.NodeID = uuid.Generate() 4207 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4208 alloc.TaskGroup = job.TaskGroups[0].Name 4209 allocs = append(allocs, alloc) 4210 } 4211 4212 // Create 20 from new job 4213 handled := make(map[string]allocUpdateType) 4214 for i := 10; i < 30; i++ { 4215 alloc := mock.Alloc() 4216 alloc.Job = job 4217 alloc.JobID = job.ID 4218 alloc.DeploymentID = d.ID 4219 alloc.NodeID = uuid.Generate() 4220 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4221 alloc.TaskGroup = job.TaskGroups[0].Name 4222 allocs = append(allocs, alloc) 4223 handled[alloc.ID] = allocUpdateFnIgnore 4224 } 4225 4226 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4227 reconciler := NewAllocReconciler(testLogger(), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4228 r := reconciler.Compute() 4229 4230 // Assert the correct results 4231 assertResults(t, r, &resultExpectation{ 4232 createDeployment: nil, 4233 deploymentUpdates: nil, 4234 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4235 job.TaskGroups[0].Name: { 4236 // All should be ignored because nothing has been marked as 4237 // healthy. 4238 Ignore: 30, 4239 }, 4240 }, 4241 }) 4242 } 4243 4244 // Tests the reconciler doesn't stop allocations when doing a rolling upgrade 4245 // where the count of the old job allocs is < desired count. 4246 func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { 4247 job := mock.Job() 4248 job.TaskGroups[0].Update = noCanaryUpdate 4249 4250 // Create 7 allocations from the old job 4251 var allocs []*structs.Allocation 4252 for i := 0; i < 7; i++ { 4253 alloc := mock.Alloc() 4254 alloc.Job = job 4255 alloc.JobID = job.ID 4256 alloc.NodeID = uuid.Generate() 4257 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4258 alloc.TaskGroup = job.TaskGroups[0].Name 4259 allocs = append(allocs, alloc) 4260 } 4261 4262 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 4263 r := reconciler.Compute() 4264 4265 d := structs.NewDeployment(job) 4266 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4267 DesiredTotal: 10, 4268 } 4269 4270 // Assert the correct results 4271 assertResults(t, r, &resultExpectation{ 4272 createDeployment: d, 4273 deploymentUpdates: nil, 4274 place: 3, 4275 destructive: 1, 4276 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4277 job.TaskGroups[0].Name: { 4278 Place: 3, 4279 DestructiveUpdate: 1, 4280 Ignore: 6, 4281 }, 4282 }, 4283 }) 4284 4285 assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) 4286 assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) 4287 } 4288 4289 // Tests that the reconciler handles rerunning a batch job in the case that the 4290 // allocations are from an older instance of the job. 4291 func TestReconciler_Batch_Rerun(t *testing.T) { 4292 job := mock.Job() 4293 job.Type = structs.JobTypeBatch 4294 job.TaskGroups[0].Update = nil 4295 4296 // Create 10 allocations from the old job and have them be complete 4297 var allocs []*structs.Allocation 4298 for i := 0; i < 10; i++ { 4299 alloc := mock.Alloc() 4300 alloc.Job = job 4301 alloc.JobID = job.ID 4302 alloc.NodeID = uuid.Generate() 4303 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4304 alloc.TaskGroup = job.TaskGroups[0].Name 4305 alloc.ClientStatus = structs.AllocClientStatusComplete 4306 alloc.DesiredStatus = structs.AllocDesiredStatusStop 4307 allocs = append(allocs, alloc) 4308 } 4309 4310 // Create a copy of the job that is "new" 4311 job2 := job.Copy() 4312 job2.CreateIndex++ 4313 4314 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "") 4315 r := reconciler.Compute() 4316 4317 // Assert the correct results 4318 assertResults(t, r, &resultExpectation{ 4319 createDeployment: nil, 4320 deploymentUpdates: nil, 4321 place: 10, 4322 destructive: 0, 4323 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4324 job.TaskGroups[0].Name: { 4325 Place: 10, 4326 DestructiveUpdate: 0, 4327 Ignore: 10, 4328 }, 4329 }, 4330 }) 4331 4332 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 4333 } 4334 4335 // Test that a failed deployment will not result in rescheduling failed allocations 4336 func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { 4337 job := mock.Job() 4338 job.TaskGroups[0].Update = noCanaryUpdate 4339 4340 tgName := job.TaskGroups[0].Name 4341 now := time.Now() 4342 // Create an existing failed deployment that has some placed allocs 4343 d := structs.NewDeployment(job) 4344 d.Status = structs.DeploymentStatusFailed 4345 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4346 Promoted: true, 4347 DesiredTotal: 5, 4348 PlacedAllocs: 4, 4349 } 4350 4351 // Create 4 allocations and mark two as failed 4352 var allocs []*structs.Allocation 4353 for i := 0; i < 4; i++ { 4354 alloc := mock.Alloc() 4355 alloc.Job = job 4356 alloc.JobID = job.ID 4357 alloc.NodeID = uuid.Generate() 4358 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4359 alloc.TaskGroup = job.TaskGroups[0].Name 4360 alloc.DeploymentID = d.ID 4361 allocs = append(allocs, alloc) 4362 } 4363 4364 //create some allocations that are reschedulable now 4365 allocs[2].ClientStatus = structs.AllocClientStatusFailed 4366 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4367 StartedAt: now.Add(-1 * time.Hour), 4368 FinishedAt: now.Add(-10 * time.Second)}} 4369 4370 allocs[3].ClientStatus = structs.AllocClientStatusFailed 4371 allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4372 StartedAt: now.Add(-1 * time.Hour), 4373 FinishedAt: now.Add(-10 * time.Second)}} 4374 4375 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4376 r := reconciler.Compute() 4377 4378 // Assert that no rescheduled placements were created 4379 assertResults(t, r, &resultExpectation{ 4380 place: 0, 4381 createDeployment: nil, 4382 deploymentUpdates: nil, 4383 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4384 job.TaskGroups[0].Name: { 4385 Ignore: 2, 4386 }, 4387 }, 4388 }) 4389 } 4390 4391 // Test that a running deployment with failed allocs will not result in 4392 // rescheduling failed allocations unless they are marked as reschedulable. 4393 func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { 4394 job := mock.Job() 4395 job.TaskGroups[0].Update = noCanaryUpdate 4396 tgName := job.TaskGroups[0].Name 4397 now := time.Now() 4398 4399 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4400 d := structs.NewDeployment(job) 4401 d.Status = structs.DeploymentStatusRunning 4402 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4403 Promoted: false, 4404 DesiredTotal: 10, 4405 PlacedAllocs: 10, 4406 } 4407 4408 // Create 10 allocations 4409 var allocs []*structs.Allocation 4410 for i := 0; i < 10; i++ { 4411 alloc := mock.Alloc() 4412 alloc.Job = job 4413 alloc.JobID = job.ID 4414 alloc.NodeID = uuid.Generate() 4415 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4416 alloc.TaskGroup = job.TaskGroups[0].Name 4417 alloc.DeploymentID = d.ID 4418 alloc.ClientStatus = structs.AllocClientStatusFailed 4419 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4420 StartedAt: now.Add(-1 * time.Hour), 4421 FinishedAt: now.Add(-10 * time.Second)}} 4422 allocs = append(allocs, alloc) 4423 } 4424 4425 // Mark half of them as reschedulable 4426 for i := 0; i < 5; i++ { 4427 allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true) 4428 } 4429 4430 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4431 r := reconciler.Compute() 4432 4433 // Assert that no rescheduled placements were created 4434 assertResults(t, r, &resultExpectation{ 4435 place: 5, 4436 createDeployment: nil, 4437 deploymentUpdates: nil, 4438 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4439 job.TaskGroups[0].Name: { 4440 Place: 5, 4441 Ignore: 5, 4442 }, 4443 }, 4444 }) 4445 } 4446 4447 // Test that a failed deployment cancels non-promoted canaries 4448 func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { 4449 // Create a job 4450 job := mock.Job() 4451 job.TaskGroups[0].Count = 3 4452 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 4453 Canary: 3, 4454 MaxParallel: 2, 4455 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 4456 MinHealthyTime: 10 * time.Second, 4457 HealthyDeadline: 10 * time.Minute, 4458 Stagger: 31 * time.Second, 4459 } 4460 4461 // Create v1 of the job 4462 jobv1 := job.Copy() 4463 jobv1.Version = 1 4464 jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"} 4465 4466 // Create v2 of the job 4467 jobv2 := job.Copy() 4468 jobv2.Version = 2 4469 jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"} 4470 4471 d := structs.NewDeployment(jobv2) 4472 state := &structs.DeploymentState{ 4473 Promoted: true, 4474 DesiredTotal: 3, 4475 PlacedAllocs: 3, 4476 HealthyAllocs: 3, 4477 } 4478 d.TaskGroups[job.TaskGroups[0].Name] = state 4479 4480 // Create the original 4481 var allocs []*structs.Allocation 4482 for i := 0; i < 3; i++ { 4483 new := mock.Alloc() 4484 new.Job = jobv2 4485 new.JobID = job.ID 4486 new.NodeID = uuid.Generate() 4487 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4488 new.TaskGroup = job.TaskGroups[0].Name 4489 new.DeploymentID = d.ID 4490 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4491 Healthy: helper.BoolToPtr(true), 4492 } 4493 new.ClientStatus = structs.AllocClientStatusRunning 4494 allocs = append(allocs, new) 4495 4496 } 4497 for i := 0; i < 3; i++ { 4498 new := mock.Alloc() 4499 new.Job = jobv1 4500 new.JobID = jobv1.ID 4501 new.NodeID = uuid.Generate() 4502 new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i)) 4503 new.TaskGroup = job.TaskGroups[0].Name 4504 new.DeploymentID = uuid.Generate() 4505 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4506 Healthy: helper.BoolToPtr(false), 4507 } 4508 new.DesiredStatus = structs.AllocDesiredStatusStop 4509 new.ClientStatus = structs.AllocClientStatusFailed 4510 allocs = append(allocs, new) 4511 } 4512 4513 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "") 4514 r := reconciler.Compute() 4515 4516 updates := []*structs.DeploymentStatusUpdate{ 4517 { 4518 DeploymentID: d.ID, 4519 Status: structs.DeploymentStatusSuccessful, 4520 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4521 }, 4522 } 4523 4524 // Assert the correct results 4525 assertResults(t, r, &resultExpectation{ 4526 createDeployment: nil, 4527 deploymentUpdates: updates, 4528 place: 0, 4529 inplace: 0, 4530 stop: 0, 4531 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4532 job.TaskGroups[0].Name: { 4533 Stop: 0, 4534 InPlaceUpdate: 0, 4535 Ignore: 3, 4536 }, 4537 }, 4538 }) 4539 } 4540 4541 // Test that a successful deployment with failed allocs will result in 4542 // rescheduling failed allocations 4543 func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) { 4544 job := mock.Job() 4545 job.TaskGroups[0].Update = noCanaryUpdate 4546 tgName := job.TaskGroups[0].Name 4547 now := time.Now() 4548 4549 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4550 d := structs.NewDeployment(job) 4551 d.Status = structs.DeploymentStatusSuccessful 4552 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4553 Promoted: false, 4554 DesiredTotal: 10, 4555 PlacedAllocs: 10, 4556 } 4557 4558 // Create 10 allocations 4559 var allocs []*structs.Allocation 4560 for i := 0; i < 10; i++ { 4561 alloc := mock.Alloc() 4562 alloc.Job = job 4563 alloc.JobID = job.ID 4564 alloc.NodeID = uuid.Generate() 4565 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4566 alloc.TaskGroup = job.TaskGroups[0].Name 4567 alloc.DeploymentID = d.ID 4568 alloc.ClientStatus = structs.AllocClientStatusFailed 4569 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4570 StartedAt: now.Add(-1 * time.Hour), 4571 FinishedAt: now.Add(-10 * time.Second)}} 4572 allocs = append(allocs, alloc) 4573 } 4574 4575 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4576 r := reconciler.Compute() 4577 4578 // Assert that rescheduled placements were created 4579 assertResults(t, r, &resultExpectation{ 4580 place: 10, 4581 createDeployment: nil, 4582 deploymentUpdates: nil, 4583 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4584 job.TaskGroups[0].Name: { 4585 Place: 10, 4586 Ignore: 0, 4587 }, 4588 }, 4589 }) 4590 assertPlaceResultsHavePreviousAllocs(t, 10, r.place) 4591 } 4592 4593 // Tests force rescheduling a failed alloc that is past its reschedule limit 4594 func TestReconciler_ForceReschedule_Service(t *testing.T) { 4595 require := require.New(t) 4596 4597 // Set desired 5 4598 job := mock.Job() 4599 job.TaskGroups[0].Count = 5 4600 tgName := job.TaskGroups[0].Name 4601 4602 // Set up reschedule policy and update stanza 4603 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 4604 Attempts: 1, 4605 Interval: 24 * time.Hour, 4606 Delay: 5 * time.Second, 4607 DelayFunction: "", 4608 MaxDelay: 1 * time.Hour, 4609 Unlimited: false, 4610 } 4611 job.TaskGroups[0].Update = noCanaryUpdate 4612 4613 // Create 5 existing allocations 4614 var allocs []*structs.Allocation 4615 for i := 0; i < 5; i++ { 4616 alloc := mock.Alloc() 4617 alloc.Job = job 4618 alloc.JobID = job.ID 4619 alloc.NodeID = uuid.Generate() 4620 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4621 allocs = append(allocs, alloc) 4622 alloc.ClientStatus = structs.AllocClientStatusRunning 4623 } 4624 4625 // Mark one as failed and past its reschedule limit so not eligible to reschedule 4626 allocs[0].ClientStatus = structs.AllocClientStatusFailed 4627 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 4628 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 4629 PrevAllocID: uuid.Generate(), 4630 PrevNodeID: uuid.Generate(), 4631 }, 4632 }} 4633 4634 // Mark DesiredTransition ForceReschedule 4635 allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)} 4636 4637 reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 4638 r := reconciler.Compute() 4639 4640 // Verify that no follow up evals were created 4641 evals := r.desiredFollowupEvals[tgName] 4642 require.Nil(evals) 4643 4644 // Verify that one rescheduled alloc was created because of the forced reschedule 4645 assertResults(t, r, &resultExpectation{ 4646 createDeployment: nil, 4647 deploymentUpdates: nil, 4648 place: 1, 4649 inplace: 0, 4650 stop: 0, 4651 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4652 job.TaskGroups[0].Name: { 4653 Place: 1, 4654 Ignore: 4, 4655 }, 4656 }, 4657 }) 4658 4659 // Rescheduled allocs should have previous allocs 4660 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 4661 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 4662 assertPlacementsAreRescheduled(t, 1, r.place) 4663 }